Ejemplo n.º 1
0
 def initiate_selenium(self):
     scream.say('Initiating selenium...')
     self.display = Display(visible=0, size=(800, 600))
     self.display.start()
     self.browser = webdriver.Firefox()
     self.browser.implicitly_wait(15)
     scream.say('Selenium ready for action')
Ejemplo n.º 2
0
def retry_if_neccessary(gotten_tag, tagname, objectname, arg_objectname):
    how_long = 60
    if gotten_tag is None:
        #retry 3 times
        for i in range(0, 3):
            time.sleep(how_long)
            how_long *= 3

            while True:
                try:
                    local_filename_html, headers_html = urllib.urlretrieve(
                        html_addr, filename + '.html')
                    break
                except IOError:
                    io_socket_message = 'Socket error while retrieving HTML' +\
                                        ' file from GitHub! Internet or ' +\
                                        'GitHub down? Retry after 1 minute'
                    scream.ssay(io_socket_message)
                    scream.log_warning(io_socket_message)
                    time.sleep(60)

            soup = BeautifulSoup(html_content_file)
            gotten_tag = soup.find(tagname, {objectname: arg_objectname})
            if gotten_tag is not None:
                raise StopIteration
        if gotten_tag is None:
            #nothing to do here, lets move on
            scream.ssay('orphaned' + filename + '.json')
            scream.log_error(
                filename + '.json' + 'is without proper html. ' +
                'GitHub not responding or giving 404/501 erorr ??')
            return None
    scream.say(
        'No action required. retry_if_neccessary() returning gotten_tag')
    return gotten_tag
Ejemplo n.º 3
0
def retry_if_neccessary(gotten_tag, tagname, objectname, arg_objectname):
    how_long = 60
    if gotten_tag is None:
        #retry 3 times
        for i in range(0, 3):
            time.sleep(how_long)
            how_long *= 3

            while True:
                try:
                    local_filename_html, headers_html = urllib.urlretrieve(
                        html_addr, filename + '.html')
                    break
                except IOError:
                    io_socket_message = 'Socket error while retrieving HTML' +\
                                        ' file from GitHub! Internet or ' +\
                                        'GitHub down? Retry after 1 minute'
                    scream.ssay(io_socket_message)
                    scream.log_warning(io_socket_message)
                    time.sleep(60)

            soup = BeautifulSoup(html_content_file)
            gotten_tag = soup.find(tagname, {objectname: arg_objectname})
            if gotten_tag is not None:
                raise StopIteration
        if gotten_tag is None:
            #nothing to do here, lets move on
            scream.ssay('orphaned' + filename + '.json')
            scream.log_error(filename + '.json' + 'is without proper html. ' +
                             'GitHub not responding or giving 404/501 erorr ??')
            return None
    scream.say('No action required. retry_if_neccessary() returning gotten_tag')
    return gotten_tag
 def initiate_selenium(self):
     scream.say('Initiating selenium...')
     self.display = Display(visible=0, size=(800, 600))
     self.display.start()
     self.browser = webdriver.Firefox()
     self.browser.implicitly_wait(15)
     scream.say('Selenium ready for action')
 def cleanup(self):
     scream.say('Marking thread on ' + str(self.threadId) + "/" + str(self.page) + ' as definitly finished..')
     self.finished = True
     try:
         self.conn.close()
     except:
         scream.log('MySQL connection instance already closed. Ok.')
     scream.say('Terminating/join() thread on ' + str(self.threadId) + ' ...')
 def __init__(self, threadId, page):
     scream.say('Initiating GeneralGetter, running __init__ procedure.')
     self.threadId = threadId
     threading.Thread.__init__(self)
     self.daemon = True
     self.finished = False
     self.page = page
     self.conn = MSQL.connect(host="10.4.4.3", port=3306, user=open('mysqlu.dat', 'r').read(),
                              passwd=open('mysqlp.dat', 'r').read(), db="github", connect_timeout=50000000)
 def __init__(self, threadId, repository, repo, github_client):
     scream.say('Initiating GeneralGetter, running __init__ procedure.')
     self.threadId = threadId
     threading.Thread.__init__(self)
     self.daemon = True
     self.finished = False
     self.repository = repository
     self.repo = repo
     self.github_client = github_client
 def __init__(self, threadId, repository, repo, github_client):
     scream.say('Initiating GeneralGetter, running __init__ procedure.')
     self.threadId = threadId
     threading.Thread.__init__(self)
     self.daemon = True
     self.finished = False
     self.repository = repository
     self.repo = repo
     self.github_client = github_client
    def get_data(self):
        global resume_stage

        scream.say('Executing inside-thread method get_data() for: ' + str(self.threadId))
        if resume_stage in [None, 'contributors']:
            #try:
            scream.ssay('Checking size of a ' + str(self.repo.getKey()) + ' team')
            '1. Team size of a repository'
            self.contributors = self.repository.get_contributors()
            assert self.contributors is not None

            self.repo_contributors = set()
            self.contributors_static = self.build_list_of_programmers(self.contributors, self.repo.getKey(), self.repository)
            for contributor in self.contributors_static.items():
                scream.log_debug('move with contributor to next from contributors_static.items()', True)
                while True:
                    scream.say('Inside while True: (line 674)')
                    try:
                        self.contributor_login = contributor[0]
                        self.contributor_object = contributor[1]
                        scream.say(str(self.contributor_login))
                        self.repo_contributors.add(self.contributor_login)
                        scream.say(str(self.repo_contributors))
                        #developer_revealed(threading.current_thread(), self.repository, self.repo, self.contributor_object)
                        developer_revealed(self.threadId, self.repository, self.repo, self.contributor_object)
                        scream.say('Finished revealing developer')
                        break
                    except TypeError as e:
                        scream.log_error('Repo + Contributor TypeError, or paginated through' +
                                         ' contributors gave error. ' + key + ', error({0})'.
                                         format(str(e)), True)
                        repos_reported_execution_error.write(key + os.linesep)
                        if force_raise:
                            raise
                        #break
                    except socket.timeout as e:
                        scream.log_error('Timeout while revealing details.. ' +
                                         ', error({0})'.format(str(e)), True)
                        freeze('socket.timeout in paginate through x contributors')
                        if force_raise:
                            raise
                        #break
                    except Exception as e:
                        scream.log_error('Exception while revealing details.. ' +
                                         ', error({0})'.format(str(e)), True)
                        freeze(str(e) + ' in paginate through x contributors')
                        if force_raise:
                            raise
                        #break

            assert self.repo_contributors is not None
            self.repo.setContributors(self.repo_contributors)
            self.repo.setContributorsCount(len(self.repo_contributors))
            scream.log('Added contributors of count: ' + str(len(self.repo_contributors)) + ' to a repo ' + key)
        self.cleanup()
Ejemplo n.º 10
0
    def get_data(self):
        global resume_stage

        scream.say('Preparing to build list of programmers: ' + str(self.threadId))
        if resume_stage in [None, 'contributors']:
            #try:
            scream.ssay('Checking size of a ' + str(self.repo.getKey()) + ' team')
            '1. Team size of a repository'
            self.contributors = self.repository.get_contributors()
            assert self.contributors is not None

            self.repo_contributors = set()
            self.contributors_static = self.build_list_of_programmers(self.contributors, self.repo.getKey(), self.repository)
            for contributor in self.contributors_static.items():
                scream.log_debug('move with contributor to next from contributors_static.items()', True)
                while True:
                    scream.say('Get details for a contributor..')
                    try:
                        self.contributor_login = contributor[0]
                        self.contributor_object = contributor[1]
                        scream.say(str(self.contributor_login))
                        self.repo_contributors.add(self.contributor_login)
                        scream.say(str(self.repo_contributors))
                        #developer_revealed(threading.current_thread(), self.repository, self.repo, self.contributor_object)
                        developer_revealed(self.threadId, self.repository, self.repo, self.contributor_object)
                        scream.say('Finished revealing developer')
                        break
                    except TypeError as e:
                        scream.log_error('Repo + Contributor TypeError, or paginated through' +
                                         ' contributors gave error. ' + key + ', error({0})'.
                                         format(str(e)), True)
                        repos_reported_execution_error.write(key + os.linesep)
                        if force_raise:
                            raise
                        #break
                    except socket.timeout as e:
                        scream.log_error('Timeout while revealing details.. ' +
                                         ', error({0})'.format(str(e)), True)
                        freeze('socket.timeout in paginate through x contributors')
                        if force_raise:
                            raise
                        #break
                    except Exception as e:
                        scream.log_error('Exception while revealing details.. ' +
                                         ', error({0})'.format(str(e)), True)
                        freeze(str(e) + ' in paginate through x contributors')
                        if force_raise:
                            raise
                        #break

            assert self.repo_contributors is not None
            self.repo.setContributors(self.repo_contributors)
            self.repo.setContributorsCount(len(self.repo_contributors))
            scream.log('Added contributors of count: ' + str(len(self.repo_contributors)) + ' to a repo ' + key)
        self.cleanup()
Ejemplo n.º 11
0
def retry_json(how_many_times, wait_time):
    while how_many_times > 0:
        scream.say('JSON retrieved to small! Retrying ' + str(how_many_times) + ' more times.')
        how_many_times -= 1
        time.sleep(wait_time)
        json = simplejson.load(key + '?client_id='
                               + client_id + '&client_secret='
                               + client_secret)
        if (len(json) >= 2):
            if not (('message' in json) and (json['message'].startswith('API rate limit exceeded for'))):
                # successfuly got a longer JSON, move on to next elif
                raise StopIteration
    return json
def check_quota_limit():
    global quota_check
    quota_check += 1
    if quota_check > 9:
        quota_check = 0
        limit = gh.get_rate_limit()
        scream.say('Rate limit: ' + str(limit.rate.limit) +
                   ' remaining: ' + str(limit.rate.remaining))
        reset_time = gh.rate_limiting_resettime
        scream.say('Rate limit reset time: ' + str(reset_time))

        if limit.rate.remaining < 10:
            freeze()
Ejemplo n.º 13
0
 def cleanup(self):
     # try:
     #     self.browser.close()
     #     self.browser.quit()
     #     self.display.stop()
     #     self.display.popen.kill()
     # except:
     #     scream.say('Did my best to clean up after selenium and pyvirtualdisplay')
     #     if force_raise:
     #         raise
     scream.say('Marking thread on ' + self.repo.getKey() + ' as finished..')
     self.finished = True
     scream.say('Terminating thread on ' + self.repo.getKey() + ' ...')
     self.terminate()
Ejemplo n.º 14
0
 def cleanup(self):
     # try:
     #     self.browser.close()
     #     self.browser.quit()
     #     self.display.stop()
     #     self.display.popen.kill()
     # except:
     #     scream.say('Did my best to clean up after selenium and pyvirtualdisplay')
     #     if force_raise:
     #         raise
     scream.say('Marking thread on ' + self.repo.getKey() +
                ' as finished..')
     self.finished = True
     scream.say('Terminating thread on ' + self.repo.getKey() + ' ...')
     self.terminate()
Ejemplo n.º 15
0
def check_quota_limit():
    global github_client
    limit = github_client.get_rate_limit()
    found_hope = False
    if limit.rate.remaining < 10:
        for quota_hope in github_clients:
            limit_hope = quota_hope.get_rate_limit()
            if limit_hope.rate.remaining > 9:
                github_client = quota_hope
                found_hope = True
                break
        if not found_hope:
            freeze()
    else:
        scream.say("Limit ok, " + str(limit.rate.remaining) + ' left.')
Ejemplo n.º 16
0
def check_quota_limit():
    global github_client
    limit = github_client.get_rate_limit()
    found_hope = False
    if limit.rate.remaining < 10:
        for quota_hope in github_clients:
            limit_hope = quota_hope.get_rate_limit()
            if limit_hope.rate.remaining > 9:
                github_client = quota_hope
                found_hope = True
                break
        if not found_hope:
            freeze()
    else:
        scream.say("Limit ok, " + str(limit.rate.remaining) + ' left.')
Ejemplo n.º 17
0
def retry_json(how_many_times, wait_time):
    while how_many_times > 0:
        scream.say('JSON retrieved to small! Retrying ' + str(how_many_times) +
                   ' more times.')
        how_many_times -= 1
        time.sleep(wait_time)
        json = simplejson.load(key + '?client_id=' + client_id +
                               '&client_secret=' + client_secret)
        if (len(json) >= 2):
            if not (
                ('message' in json) and
                (json['message'].startswith('API rate limit exceeded for'))):
                # successfuly got a longer JSON, move on to next elif
                raise StopIteration
    return json
Ejemplo n.º 18
0
    def cleanup(self):
        global use_splinter

        if use_splinter:
            scream.say("Cleanup of splinter")
            try:
                self.splinter__browser.quit()
            except:
                scream.say('Did my best to clean up')
        else:
            try:
                self.browser.close()
                self.browser.quit()
                self.display.stop()
                self.display.popen.kill()
            except:
                scream.say('Did my best to clean up after selenium and pyvirtualdisplay')
                if force_raise:
                    raise
        scream.say('Marking thread on ' + self.repo.getKey() + ' as finished..')
        self.finished = True
        scream.say('Terminating thread on ' + self.repo.getKey() + ' ...')
        self.terminate()
Ejemplo n.º 19
0
def developer_revealed(repository, repo, contributor, result_writer):
    #repository = github object, repo = my class object, contributor = nameduser
    scream.say('assigning a contributor: ' + str(contributor) +
               ' to a repo: ' + str(repository) + ' and mock object ' +
               str(repo))
    login = contributor.login
    name = contributor.name
    #1
    followers = contributor.followers
    #2
    following = contributor.following
    scream.say(following)
    scream.say(followers)
    his_repositories = contributor.get_repos()
    #3
    total_his_repositories = 0
    total_his_stars = 0
    total_his_watchers = 0
    total_his_forks = 0
    total_network_count = 0
    for his_repo in his_repositories:
        # his_repo.get_stats_contributors()
        check_quota_limit()
        total_his_repositories += 1
        total_his_forks += his_repo.forks_count
        total_his_stars += his_repo.stargazers_count
        total_his_watchers += his_repo.watchers_count
        total_network_count += his_repo.network_count
    #4
    # as far as i know - almost impossible to get
    # 5
    # blazej task, not mine
    # 6
    collaborators = contributor.collaborators
    company = contributor.company
    contributions = contributor.contributions
    created_at = contributor.created_at
    hireable = contributor.hireable
    result_writer.writerow([
        repo.getUrl(),
        repo.getName(),
        repo.getOwner(), login, (name if name is not None else ''),
        str(followers),
        str(following),
        str(collaborators), (company if company is not None else ''),
        str(contributions),
        str(created_at), (str(hireable) if hireable is not None else ''),
        str(total_his_repositories),
        str(total_his_stars),
        str(total_his_watchers),
        str(total_his_forks),
        str(total_network_count)
    ])
Ejemplo n.º 20
0
def developer_revealed(repository, repo, contributor, result_writer):
    #repository = github object, repo = my class object, contributor = nameduser
    scream.say('assigning a contributor: ' + str(contributor) + ' to a repo: ' + str(repository) + ' and mock object ' + str(repo))
    login = contributor.login
    name = contributor.name
    #1
    followers = contributor.followers
    #2
    following = contributor.following
    scream.say(following)
    scream.say(followers)
    his_repositories = contributor.get_repos()
    #3
    total_his_repositories = 0
    total_his_stars = 0
    total_his_watchers = 0
    total_his_forks = 0
    total_network_count = 0
    for his_repo in his_repositories:
        # his_repo.get_stats_contributors()
        check_quota_limit()
        total_his_repositories += 1
        total_his_forks += his_repo.forks_count
        total_his_stars += his_repo.stargazers_count
        total_his_watchers += his_repo.watchers_count
        total_network_count += his_repo.network_count
    #4
    # as far as i know - almost impossible to get
    # 5
    # blazej task, not mine
    # 6
    collaborators = contributor.collaborators
    company = contributor.company
    contributions = contributor.contributions
    created_at = contributor.created_at
    hireable = contributor.hireable
    result_writer.writerow([repo.getUrl(), repo.getName(), repo.getOwner(), login,
                           (name if name is not None else ''), str(followers), str(following),
                           str(collaborators), (company if company is not None else ''), str(contributions),
                           str(created_at), (str(hireable) if hireable is not None else ''), str(total_his_repositories), str(total_his_stars),
                           str(total_his_watchers), str(total_his_forks), str(total_network_count)])
 def set_finished(self, finished):
     scream.say('Marking the thread ' + str(self.threadId) +
                ' as finished..')
     self.finished = finished
Ejemplo n.º 22
0
def make_headers(filename_for_headers):
    with open(filename_for_headers, 'ab') as output_csvfile:
        devs_head_writer = UnicodeWriter(output_csvfile) if use_utf8 else csv.writer(output_csvfile, dialect=WriterDialect)
        tempv = ('repo_url', 'repo_name', 'repo_owner', 'dev_login', 'dev_name',
                 'followers', 'following', 'collaborators', 'company', 'contributions', 'created_at', 'hireable',
                 'total_his_repositories', 'total_his_stars', 'total_his_watchers', 'total_his_forks', 'total_network_count')
        devs_head_writer.writerow(tempv)


if __name__ == "__main__":
    '''
    Starts process of work on CSV files which are output of Google Bigquery
    whenever intelliGit.py is executed as an standalone program
    the program reads through the input and gets all data bout programmers
    '''
    scream.say('Start main execution')
    scream.say('Welcome to WikiTeams.pl GitHub repo analyzer!')
    scream.say(version_name)

    secrets = []
    credential_list = []
    with open('pass.txt', 'r') as passfile:
        line__id = 0
        for line in passfile:
            line__id += 1
            secrets.append(line)
            if line__id % 4 == 0:
                login_or_token__ = str(secrets[0]).strip()
                pass_string = str(secrets[1]).strip()
                client_id__ = str(secrets[2]).strip()
                client_secret__ = str(secrets[3]).strip()
            are_working += 1
    return are_working


def num_modulo(thread_id_count__):
    global no_of_threads
    return thread_id_count__ % no_of_threads


if __name__ == "__main__":
    '''
    Starts process of work on CSV files which are output of Google Bigquery
    whenever intelliGit.py is executed as an standalone program
    the program reads through the input and gets all data bout programmers
    '''
    scream.say('Start main execution')
    scream.say('Welcome to WikiTeams.pl GitHub repo analyzer!')
    scream.say(version_name)

    secrets = []

    credential_list = []
    # reading the secrets, the Github factory objects will be created in next paragraph
    with open('pass.txt', 'r') as passfile:
        line__id = 0
        for line in passfile:
            line__id += 1
            secrets.append(line)
            if line__id % 4 == 0:
                login_or_token__ = str(secrets[0]).strip()
                pass_string = str(secrets[1]).strip()
def freeze(message):
    global sleepy_head_time
    scream.say('Sleeping for ' + str(sleepy_head_time) + ' seconds. Reason: ' +
               str(message))
    time.sleep(sleepy_head_time)
Ejemplo n.º 25
0
        tempv = ('repo_url', 'repo_name', 'repo_owner', 'dev_login',
                 'dev_name', 'followers', 'following', 'collaborators',
                 'company', 'contributions', 'created_at', 'hireable',
                 'total_his_repositories', 'total_his_stars',
                 'total_his_watchers', 'total_his_forks',
                 'total_network_count')
        devs_head_writer.writerow(tempv)


if __name__ == "__main__":
    '''
    Starts process of work on CSV files which are output of Google Bigquery
    whenever intelliGit.py is executed as an standalone program
    the program reads through the input and gets all data bout programmers
    '''
    scream.say('Start main execution')
    scream.say('Welcome to WikiTeams.pl GitHub repo analyzer!')
    scream.say(version_name)

    secrets = []
    credential_list = []
    with open('pass.txt', 'r') as passfile:
        line__id = 0
        for line in passfile:
            line__id += 1
            secrets.append(line)
            if line__id % 4 == 0:
                login_or_token__ = str(secrets[0]).strip()
                pass_string = str(secrets[1]).strip()
                client_id__ = str(secrets[2]).strip()
                client_secret__ = str(secrets[3]).strip()
Ejemplo n.º 26
0
 def set_finished(self, finished):
     scream.say('Marking the thread ' + str(self.threadId) + ' as finished..')
     self.finished = finished
Ejemplo n.º 27
0
                 time.sleep(60)
         local_soup = BeautifulSoup(html)
         failed = local_soup.find("span", {
             "id":
             "ctl00_ContentPlaceHolder1_" + "LabelFailedSearchedFor"
         })
         if failed is not None:
             persist_users[s] = s + ',' + fullname.strip()
             return s + ',' + fullname.strip()
         gender_tag = local_soup.find(
             "span",
             {"id": "ctl00_ContentPlaceHolder1_" + "LabelGenderFound"})
         if ((gender_tag is not None) and (gender_tag.contents is not None)
                 and (len(gender_tag.contents) > 0)):
             gender = gender_tag.contents[0].string
             scream.say(gender)
             persist_users[s] = s + ',' + fullname.strip() + ',' + gender
             return s + ',' + fullname.strip() + ',' + gender
         else:
             scream.log_warning(
                 'Something really wrong, on result page there ' +
                 'was no not-found label neither a proper result')
             persist_users[s] = s + ',' + fullname.strip()
             return s + ',' + fullname.strip()
     else:
         persist_users[s] = s + ',' + fullname.strip()
         return s + ',' + fullname.strip()
 else:
     scream.say('Fullname not provided')
     persist_users[s] = None
     return s
Ejemplo n.º 28
0
def freeze(message):
    global sleepy_head_time
    scream.say('Sleeping for ' + str(sleepy_head_time) + ' seconds. Reason: ' + str(message))
    time.sleep(sleepy_head_time)
Ejemplo n.º 29
0
def developer_revealed(thread_getter_instance, repository, repo, contributor):
    global result_writer

    developer_login = contributor.login
    scream.log_debug(
        'Assigning a contributor: ' + str(developer_login) + ' to a repo: ' +
        str(repository.name), True)
    developer_name = contributor.name
    # 1 Ilosc osob, ktore dany deweloper followuje [FollowEvent]
    developer_followers = contributor.followers
    # 2 Ilosc osob, ktore followuja dewelopera [FollowEvent]
    developer_following = contributor.following

    developer_location = contributor.location
    developer_total_private_repos = contributor.total_private_repos
    developer_total_public_repos = contributor.public_repos

    # 5.  Ilosc repo, ktorych nie tworzyl, w ktorych jest team member [TeamAddEvent] [MemberEvent]
    developer_collaborators = contributor.collaborators
    # 6.  Ilosc repo, ktorych nie tworzyl, w ktorych jest contributorem [PushEvent] [IssuesEvent] [PullRequestEvent] [GollumEvent]
    developer_contributions = contributor.contributions

    # - Ilosc projektow przez niego utworzonych
    his_repositories = contributor.get_repos()

    while True:
        total_his_repositories = 0
        total_his_stars = 0
        total_his_watchers = 0
        total_his_forks = 0
        total_his_has_issues = 0
        total_his_has_wiki = 0
        total_his_open_issues = 0
        total_network_count = 0
        total_his_collaborators = 0
        total_his_contributors = 0

        if count___ == 'selenium':
            total_his_commits = 0
            total_his_branches = 0
            total_his_releases = 0
            total_his_issues = 0
            total_his_pull_requests = 0

        try:
            for his_repo in his_repositories:

                try:
                    total_his_repositories += 1
                    total_his_forks += his_repo.forks_count
                    total_his_stars += his_repo.stargazers_count
                    total_his_watchers += his_repo.watchers_count
                    total_his_has_issues += 1 if his_repo.has_issues else 0
                    total_his_has_wiki += 1 if his_repo.has_wiki else 0
                    total_his_open_issues += his_repo.open_issues
                    total_network_count += his_repo.network_count

                    if count___ == 'api':
                        # 3 Ilosc deweloperow, ktorzy sa w projektach przez niego utworzonych [PushEvent] [IssuesEvent] [PullRequestEvent] [GollumEvent]
                        total_his_contributors = None
                        while True:
                            try:
                                total_his_contributors = 0
                                #total_his_contributors = his_repo.get_contributors().totalCount -- this is buggy and will make errors
                                total_his_contributors += sum(
                                    1 for temp_object in
                                    his_repo.get_contributors())
                                break
                            except:
                                freeze(
                                    'Exception in getting total_his_contributors'
                                )
                                if force_raise:
                                    raise
                        assert total_his_contributors is not None

                        # 4 Ilosc kontrybutorow, ktorzy sa w projektach przez niego utworzonych
                        total_his_collaborators = None
                        while True:
                            try:
                                total_his_collaborators = 0
                                #total_his_collaborators = his_repo.get_collaborators().totalCount -- this is buggy and will make errors
                                total_his_collaborators += sum(
                                    1 for temp_object in
                                    his_repo.get_collaborators())
                                break
                            except:
                                freeze(
                                    'Exception in getting total_his_collaborators'
                                )
                                if force_raise:
                                    raise
                        assert total_his_collaborators is not None
                    elif count___ == 'selenium':
                        scream.say('Using selenium for thread about  ' +
                                   str(developer_login) + ' \'s repositories')
                        result = thread_getter_instance.analyze_with_selenium(
                            his_repo
                        )  # wyciagnij statystyki przez selenium, i zwroc w tablicy:
                        # commits, branches, releases, contributors, issues, pull requests
                        if result['status'] == '404':
                            continue
                        if result['status'] == 'EMPTY':
                            continue
                        total_his_commits += result['commits']
                        total_his_branches += result['branches']
                        total_his_releases += result['releases']
                        total_his_issues += result['issues']
                        total_his_pull_requests += result['pulls']
                        total_his_contributors += result['contributors']
                    else:  # hence it is only when not selenium is used
                        while True:
                            try:
                                his_contributors = set()
                                stats = his_repo.get_stats_contributors()
                                assert stats is not None
                                for stat in stats:
                                    if str(stat.author.login).strip() in [
                                            'None', ''
                                    ]:
                                        continue
                                    his_contributors.add(stat.author.login)
                                total_his_contributors += len(his_contributors)
                                break
                            except Exception as exc:
                                scream.log_warning(
                                    'Not ready data while revealing details.. '
                                    + ', error({0})'.format(str(exc)), True)
                                freeze(
                                    'StatsContribution not ready.. waiting for the server to provide good data'
                                )
                                if force_raise:
                                    raise
                except GithubException as e:
                    freeze(str(e) + ' in try per repo of x-dev repos')
                    if ("message"
                            in e.data) and (e.data["message"].strip()
                                            == "Repository access blocked"):
                        scream.log_debug("It is a private repo.. Skip!")
                        continue
                    if force_raise:
                        raise
            break
        except Exception as e:
            freeze(str(e) + ' in main loop of developer_revealed()')
            his_repositories = contributor.get_repos()
            if force_raise:
                raise

    # Developer company (if any given)
    company = contributor.company
    created_at = contributor.created_at
    # Does the developer want to be hired?
    hireable = contributor.hireable

    scream.log_debug(
        'Thread ' + str(thread_getter_instance.threadId) +
        ' Finished revealing contributor: ' + str(developer_login) +
        ' in a repo: ' + str(repository.name), True)

    if show_trace:
        scream.log_debug('Printing traceback stack', True)
        traceback.print_stack()
        scream.log_debug('Printing traceback exc pathway', True)
        traceback.print_exc()

    if not use_utf8:
        result_writer.writerow([
            str(repo.getUrl()),
            str(repo.getName()),
            str(repo.getOwner()),
            str(repo.getStargazersCount()),
            str(repo.getWatchersCount()),
            str(developer_login),
            (str(developer_name) if developer_name is not None else ''),
            str(developer_followers),
            str(developer_following),
            str(developer_collaborators),
            (str(company) if company is not None else ''),
            str(developer_contributions),
            str(created_at), (str(hireable) if hireable is not None else ''),
            str(total_his_repositories),
            str(total_his_stars),
            str(total_his_collaborators),
            str(total_his_contributors),
            str(total_his_watchers),
            str(total_his_forks),
            str(total_his_has_issues),
            str(total_his_has_wiki),
            str(total_his_open_issues),
            str(total_network_count),
            (str(developer_location)
             if developer_location is not None else ''),
            str(developer_total_private_repos),
            str(developer_total_public_repos),
            str(total_his_issues),
            str(total_his_pull_requests)
        ])
    else:
        result_writer.writerow([
            repo.getUrl(),
            repo.getName(),
            repo.getOwner(),
            str(repo.getStargazersCount()),
            str(repo.getWatchersCount()), developer_login,
            (developer_name if developer_name is not None else ''),
            str(developer_followers),
            str(developer_following),
            str(developer_collaborators),
            (company if company is not None else ''),
            str(developer_contributions),
            str(created_at), (str(hireable) if hireable is not None else ''),
            str(total_his_repositories),
            str(total_his_stars),
            str(total_his_collaborators),
            str(total_his_contributors),
            str(total_his_watchers),
            str(total_his_forks),
            str(total_his_has_issues),
            str(total_his_has_wiki),
            str(total_his_open_issues),
            str(total_network_count),
            (developer_location if developer_location is not None else ''),
            str(developer_total_private_repos),
            str(developer_total_public_repos),
            str(total_his_issues),
            str(total_his_pull_requests)
        ])

    scream.log_debug('Wrote row to CSV.', True)
Ejemplo n.º 30
0
 def cleanup(self):
     scream.say('Marking thread on ' + self.repo.getKey() + ' as finished..')
     self.finished = True
     scream.say('Terminating/join() thread on ' + self.repo.getKey() + ' ...')
Ejemplo n.º 31
0
    time.sleep(sleepy_head_time)
    limit = gh.get_rate_limit()
    while limit.rate.remaining < 15:
        time.sleep(sleepy_head_time)


def freeze_more():
    freeze()


if __name__ == "__main__":
    '''
    Starts process of work on CSV files which are output of google bigquery
    whenever intelli_git.py is executed as an standalone program
    '''
    scream.say('Start main execution')
    scream.say('Welcome to WikiTeams.pl GitHub repo getter!')
    scream.say(version_name)

    secrets = []
    credential_list = []
    with open('pass.txt', 'r') as passfile:
        line__id = 0
        for line in passfile:
            line__id += 1
            secrets.append(line)
            if line__id % 4 == 0:
                login_or_token__ = str(secrets[0]).strip()
                pass_string = str(secrets[1]).strip()
                client_id__ = str(secrets[2]).strip()
                client_secret__ = str(secrets[3]).strip()
    def get_data(self, page, conn):
        global results_done
        global results_all
        global pagination
        global openhub_query_tags

        self.params_sort_rating = urllib.urlencode({'query': 'tag:' + openhub_query_tags[0], 'api_key': return_random_openhub_key(),
                                                    'sort': 'rating', 'page': page})
        self.projects_api_url = "https://www.openhub.net/projects.xml?%s" % (self.params_sort_rating)

        self.result_flow = urllib.urlopen(self.projects_api_url)

        scream.say('')
        scream.say('-------------------------- PAGE ' + str(page) + ' parsed -----------------------------')
        scream.say('')

        # Parse the response into a structured XML object
        self.tree = ET.parse(self.result_flow)

        # Did Ohloh return an error?
        self.elem = self.tree.getroot()
        self.error = self.elem.find("error")
        if self.error is not None:
            print 'OpenHub returned ERROR:', ET.tostring(self.error),
            sys.exit()

        results_done += int(self.elem.find("items_returned").text)
        results_all = int(self.elem.find("items_available").text)

        self.i = 0
        for self.node in self.elem.findall("result/project"):
            self.i += 1
            scream.say('Checking element ' + str(self.i) + '/' + str(pagination))

            self.project_id = self.node.find("id").text
            self.project_name = self.node.find("name").text
            self.project_url = self.node.find("url").text
            self.project_htmlurl = self.node.find("html_url").text
            self.project_created_at = self.node.find("created_at").text
            self.project_updated_at = self.node.find("updated_at").text
            self.project_homepage_url = self.node.find("homepage_url").text

            self.project_average_rating = self.node.find("average_rating").text
            self.project_rating_count = self.node.find("rating_count").text
            self.project_review_count = self.node.find("review_count").text

            self.project_activity_level = self.node.find("project_activity_index/value").text

            self.project_user_count = self.node.find("user_count").text

            # project may have multiple GitHub repositories
            # or even it may be not present on GitHub - check that

            self.is_github_project = False
            self.github_repo_id = None

            # in case of multiple github CODE repositories (quite often)
            # treat as a seperate repo - remember, we focus on github repositories, not aggregates

            self.enlistments_detailed_params = urllib.urlencode({'api_key': return_random_openhub_key()})
            self.enlistments_detailed_url = "https://www.openhub.net/projects/%s/enlistments.xml?%s" % (self.project_id, self.enlistments_detailed_params)

            self.enlistments_result_flow = urllib.urlopen(self.enlistments_detailed_url)

            # Parse the response into a structured XML object
            self.enlistments_tree = ET.parse(self.enlistments_result_flow)

            # Did Ohloh return an error?
            self.enlistments_elem = self.enlistments_tree.getroot()
            self.enlistments_error = self.enlistments_elem.find("error")
            if self.enlistments_error is not None:
                print 'Ohloh returned:', ET.tostring(self.enlistments_error),
                sys.exit()

            self.repos_lists = list()

            for self.enlistment_node in self.enlistments_elem.findall("result/enlistment"):
                self.ee_type = self.enlistment_node.find("repository/type").text
                if (self.ee_type == "GitRepository"):
                    self.ee_link = self.enlistment_node.find("repository/url").text
                    if (self.ee_link.startswith("git://github.com/")):
                        scream.say('Is a GitHub project!')
                        self.is_github_project = True
                        self.github_repo_id = self.ee_link.split("git://github.com/")[1].split(".git")[0]
                        scream.say(self.github_repo_id)
                        self.repos_lists.append(self.github_repo_id)

            if not self.is_github_project:
                continue

            # now lets get even more sophisticated details
            self.params_detailed_url = urllib.urlencode({'api_key': return_random_openhub_key()})
            self.project_detailed_url = "https://www.openhub.net/projects/%s.xml?%s" % (self.project_id, self.params_detailed_url)  # how come here was a typo ?

            self.detailed_result_flow = urllib.urlopen(self.project_detailed_url)

            # Parse the response into a structured XML object
            self.detailed_tree = ET.parse(self.detailed_result_flow)

            # Did Ohloh return an error?
            self.detailed_elem = self.detailed_tree.getroot()
            self.detailed_error = self.detailed_elem.find("error")
            if self.detailed_error is not None:
                print 'Ohloh returned:', ET.tostring(self.detailed_error),
                sys.exit()

            self.twelve_month_contributor_count = self.detailed_elem.find("result/project/analysis/twelve_month_contributor_count").text
            self.total_contributor_count = self.detailed_elem.find("result/project/analysis/total_contributor_count").text
            self.twelve_month_commit_count = self.detailed_elem.find("result/project/analysis/twelve_month_commit_count")
            self.twelve_month_commit_count = self.twelve_month_commit_count.text if self.twelve_month_commit_count is not None else NullChar
            self.total_commit_count = self.detailed_elem.find("result/project/analysis/total_commit_count")
            self.total_commit_count = self.total_commit_count.text if self.total_commit_count is not None else NullChar
            self.total_code_lines = self.detailed_elem.find("result/project/analysis/total_code_lines")
            self.total_code_lines = self.total_code_lines.text if self.total_code_lines is not None else NullChar
            self.main_language_name = self.detailed_elem.find("result/project/analysis/main_language_name")
            self.main_language_name = self.main_language_name.text if self.main_language_name is not None else NullChar

            self.current_ghc = github_clients[num_modulo(self.i-1)]
            self.current_ghc_desc = github_clients_ids[num_modulo(self.i-1)]

            print 'Now using github client id: ' + str(self.current_ghc_desc)

            for self.gh_entity in self.repos_lists:

                try:
                    self.repository = self.current_ghc.get_repo(self.gh_entity)
                    self.repo_name = self.repository.name
                    self.repo_full_name = self.repository.full_name
                    self.repo_html_url = self.repository.html_url
                    self.repo_stargazers_count = self.repository.stargazers_count
                    self.repo_forks_count = self.repository.forks_count
                    self.repo_created_at = self.repository.created_at
                    self.repo_is_fork = self.repository.fork
                    self.repo_has_issues = self.repository.has_issues
                    self.repo_open_issues_count = self.repository.open_issues_count
                    self.repo_has_wiki = self.repository.has_wiki
                    self.repo_network_count = self.repository.network_count
                    self.repo_pushed_at = self.repository.pushed_at
                    self.repo_size = self.repository.size
                    self.repo_updated_at = self.repository.updated_at
                    self.repo_watchers_count = self.repository.watchers_count

                    # Now its time to get the list of developers!

                    # yay! rec-09 mysql instance is visible from the yoshimune computer !
                    # ok, but I forgot github blacklisted our comptuing clusters
                    # make sure your local win machine runs it..
                    # just pjatk things.. carry on

                    scream.say('Retrieving the project id from mysql database.. should take max 1 second.')

                    # Get here project id used in the database !
                    #conn.ping(True)
                    self.cursor = conn.cursor()
                    self.cursor.execute(r'select distinct id from (select * from projects where `name`="{0}") as p where url like "%{1}"'.format(self.repo_name, self.repo_full_name))
                    self.rows = self.cursor.fetchall()

                    try:
                        self.repo_db_id = self.rows[0]
                    except:
                        #print str(cursor.info())
                        # this is too new repo , because it is not found on mysql db, skip it !
                        continue
                        #print 'Faulty query was: -------- '
                        #print r'select distinct id from (select * from projects where `name`="{0}") as p where url like "%{1}"'.format(self.repo_name, self.repo_full_name)

                    scream.say('project id retrieved from database is: ' + str(self.repo_db_id))

                    self.cursor.close()

                    #conn.ping(True)
                    self.cursor = conn.cursor()
                    # Now get list of GitHub logins which are project_members !
                    self.cursor.execute(r'SELECT login FROM project_members INNER JOIN users ON users.id = project_members.user_id WHERE repo_id = %s' % self.repo_db_id)
                    self.project_developers = self.cursor.fetchall()

                    self.project_developers = [i[0] for i in self.project_developers]  # unzipping tuples in tuples
                    self.contributors_count = len(self.project_developers)

                    self.cursor.close()
                    #conn.close()

                    for self.project_developer in self.project_developers:

                        # create a GitHub user named object for GitHub API
                        self.current_user = self.current_ghc.get_user(self.project_developer)

                        self.current_user_bio = self.current_user.bio
                        self.current_user_blog = self.current_user.blog
                        self.current_user_collaborators = self.current_user.collaborators
                        self.current_user_company = self.current_user.company
                        self.current_user_contributions = self.current_user.contributions
                        self.current_user_created_at = self.current_user.created_at
                        self.current_user_followers = self.current_user.followers
                        self.current_user_following = self.current_user.following

                        self.current_user_hireable = self.current_user.hireable
                        self.current_user_login = self.current_user.login
                        self.current_user_name = self.current_user.name

                        self.developer_login = self.project_developer

                        # Does he commit during business hours?
                        scream.log_debug("Starting to analyze OSRC card for user: "******"Histogram for hours for user: "******"PushEvent"):
                                        self.developer_all_pushes += self.usage_element['total']
                                    elif (self.usage_element['type'] == "WatchEvent"):
                                        self.developer_all_stars_given += self.usage_element['total']
                                    elif (self.usage_element['type'] == "CreateEvent"):
                                        self.developer_all_creations += self.usage_element['total']
                                    elif (self.usage_element['type'] == "IssuesEvent"):
                                        self.developer_all_issues_created += self.usage_element['total']
                                    elif (self.usage_element['type'] == "PullRequestEvent"):
                                        self.developer_all_pull_requests += self.usage_element['total']

                                # -----------------------------------------------------------------------
                                scream.log_debug('Finished analyze OSRC card for user: '******'OSRC gave error, probably 404')
                                scream.say('try ' + str(self.tries) + ' more times')
                                self.tries -= 1
                            finally:
                                if self.tries < 1:
                                    self.developer_works_during_bd = None
                                    self.developer_works_period = 0
                                    break

                        self.collection = [str(((page-1)*pagination) + self.i), self.gh_entity, self.repo_full_name, self.repo_html_url,
                                           str(self.repo_forks_count), str(self.repo_stargazers_count), str(self.contributors_count),
                                           str(self.repo_created_at), str(self.repo_is_fork), str(self.repo_has_issues), str(self.repo_open_issues_count),
                                           str(self.repo_has_wiki), str(self.repo_network_count), str(self.repo_pushed_at), str(self.repo_size),
                                           str(self.repo_updated_at), str(self.repo_watchers_count), self.project_id,
                                           self.project_name, self.project_url, self.project_htmlurl, str(self.project_created_at),
                                           str(self.project_updated_at), self.project_homepage_url, str(self.project_average_rating),
                                           str(self.project_rating_count), str(self.project_review_count), self.project_activity_level,
                                           str(self.project_user_count), str(self.twelve_month_contributor_count), str(self.total_contributor_count),
                                           str(self.twelve_month_commit_count), str(self.total_commit_count), str(self.total_code_lines),
                                           self.main_language_name, str(self.developer_works_during_bd), str(self.developer_works_period),
                                           str(self.developer_all_pushes), str(self.developer_all_stars_given), str(self.developer_all_creations),
                                           str(self.developer_all_issues_created), str(self.developer_all_pull_requests)]

                        csv_writer.writerow(self.collection)
                        #self.set_finished(True)
                        print '.'
                except UnknownObjectException:
                    print 'Repo ' + self.gh_entity + ' is not available anymore..'
                except GithubException:
                    # TODO: write here something clever
                    raise
        self.set_finished(True)
Ejemplo n.º 33
0
    def analyze_with_selenium(self, repository):
        result = dict()
        scream.say('Starting webinterpret for ' + repository.html_url + '..')
        assert repository is not None
        url = repository.html_url
        assert url is not None
        while True:
            try:
                self.browser.set_page_load_timeout(15)
                self.browser.get(url)
                scream.say('Data from web retrieved')
                doc = html.document_fromstring(
                    unicode(self.browser.page_source))
                scream.log_debug(str(url), True)
                scream.say('Continue to work on ' + url)
                scream.say('Page source sent further')

                scream.say(
                    'Verify if 404 (repo deleted) otherwise keep on going')
                parallax = doc.xpath('//div[@id="parallax_illustration"]')

                if (len(parallax) > 0):
                    scream.say('Verified that 404 (repo deleted)')
                    result['status'] = '404'
                    break

                scream.say('Verified that not 404')

                scream.say('Verify if repo empty otherwise keep on going')
                repo_empty = doc.xpath(
                    '//div[@class="blankslate has-fixed-width"]')

                if (len(repo_empty) > 0):
                    scream.say('Verified that repo is empty')
                    result['status'] = 'EMPTY'
                    break

                scream.say('Verified that repo not empty')

                ns = doc.xpath('//ul[@class="numbers-summary"]')
                sunken = doc.xpath('//ul[@class="sunken-menu-group"]')

                scream.say('XPath made some search for ' + url +
                           ' .. move on to bsoup..')
                scream.say('Xpath done searching')
                scream.say('Element found?: ' + str(len(ns) == 1))

                element = ns[0]
                element_sunken = sunken[0]
                local_soup = BeautifulSoup(etree.tostring(element))
                local_soup_sunken = BeautifulSoup(
                    etree.tostring(element_sunken))

                enumarables = local_soup.findAll("li")
                enumarables_more = local_soup_sunken.findAll("li")

                commits = enumarables[0]
                scream.say('enumarables[0]')
                commits_number = analyze_tag(
                    commits.find("span", {"class": "num"}))
                scream.say('analyze_tag finished execution for commits_number')
                scream.say('Before parse number: ' + str(commits_number))
                result['commits'] = parse_number(commits_number)
                scream.log_debug(result['commits'], True)
                scream.say('enumarables[1]')
                branches = enumarables[1]
                branches_number = analyze_tag(
                    branches.find("span", {"class": "num"}))
                scream.say('Before parse number: ' + str(branches_number))
                result['branches'] = parse_number(branches_number)
                scream.log_debug(result['branches'], True)
                scream.say('enumarables[2]')
                releases = enumarables[2]
                releases_number = analyze_tag(
                    releases.find("span", {"class": "num"}))
                scream.say('Before parse number: ' + str(releases_number))
                result['releases'] = parse_number(releases_number)
                scream.log_debug(result['releases'], True)
                scream.say('enumarables[3]')
                contributors = enumarables[3]
                contributors_number = analyze_tag(
                    contributors.find("span", {"class": "num"}))
                scream.say('Before parse number: ' + str(contributors_number))
                result['contributors'] = parse_number(contributors_number)
                scream.log_debug(result['contributors'], True)

                result['issues'] = 0
                result['pulls'] = 0

                for enumerable___ in enumarables_more:
                    if enumerable___["aria-label"] == "Pull Requests":
                        pulls_tag = enumerable___
                        pulls_number = analyze_tag(
                            pulls_tag.find("span", {"class": "counter"}))
                        scream.say('Before parse number: ' + str(pulls_number))
                        result['pulls'] = parse_number(pulls_number)
                    elif enumerable___["aria-label"] == "Issues":
                        issues_tag = enumerable___
                        issues_number = analyze_tag(
                            issues_tag.find("span", {"class": "counter"}))
                        scream.say('Before parse number: ' +
                                   str(issues_number))
                        result['issues'] = parse_number(issues_number)

                result['status'] = 'OK'
                break
            except TypeError as ot:
                scream.say(str(ot))
                scream.say(
                    'Scrambled results (TypeError). Maybe GitHub down. Retry')
                time.sleep(5.0)
                if force_raise:
                    raise
            except Exception as e:
                scream.say(str(e))
                scream.say('No response from selenium. Retry')
                time.sleep(2.0)
                if force_raise:
                    raise

        assert 'status' in result
        return result
Ejemplo n.º 34
0
     # OSRC was grumpy about the urllib2 even with headers attached
     # hdr = {'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7',
     #        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
     #        'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
     #        'Accept-Encoding': 'none',
     #        'Accept-Language': 'en-US,en;q=0.8',
     #        'Connection': 'keep-alive'}
     # req = urllib2.Request(osrc_url, headers=hdr)
     # response = urllib2.urlopen(req)
     # thus i moved to requests library
     proxy = {'http': '94.154.26.132:8090'}
     session_osrc = requests.Session()
     requests_osrc = session_osrc.get(osrc_url, proxies=proxy)
     # print requests_osrc.text
     osrc_data = json.loads(requests_osrc.text)
     scream.say("JSON parsed..")
     if ("message" in osrc_data) and (osrc_data["message"].startswith("Not enough information for")):
         scream.say(osrc_data["message"])
         limit = limit + 1
         break
     for language in osrc_data["usage"]["languages"]:
         #print language["count"]
         #print language["language"]
         csv_writer.writerow([str(counter), str(developer_login), str(language["language"]), str(language["count"]), str(language["quantile"])])
     scream.log_debug("Languages diagram for user " + str(developer_login) + ' created..', True)
     # -----------------------------------------------------------------------
     scream.log_debug('Finished analyze OSRC card for user: '******'OSRC gave error, probably 404')
Ejemplo n.º 35
0
def descr_user(s):
    if s in persist_users:
        if persist_users[s] is None:
            return s
        else:
            return persist_users[s]
    #get user name and surname here
    while True:
        try:
            response = urllib.urlopen('https://api.github.com/users/' + s +
                                      '?client_id=' + client_id +
                                      '&client_secret=' + client_secret)
            break
        except IOError:
            print 'API GitHub not responding, urlopen failed'
            print 'retrying after 1 minute'
            time.sleep(60)
    scream.ssay(response)
    data = simplejson.load(response)
    scream.ssay(data)
    #fullname = data['name']
    if 'name' in data:
        fullname = data['name']
    else:
        scream.say('Fullname not provided')
        persist_users[s] = None
        return s
    if fullname is None:
        scream.say('Fullname provided but an empty entry')
        persist_users[s] = None
        return s
    if (len(fullname) > 0):
        first_name = unicode(fullname.split()[0])
        if (len(first_name) > 0):
            scream.say('#ask now internet for gender')
            while True:
                try:
                    response = my_browser.open('http://genderchecker.com/')
                    response.read()
                    break
                except urllib2.URLError:
                    scream.ssay('Site genderchecker.com seems to be down' +
                                '. awaiting for 60s before retry')
                    time.sleep(60)
            scream.say('Response read. Mechanize selecting form.')
            my_browser.select_form("aspnetForm")
            my_browser.form.set_all_readonly(False)
            # allow everything to be written

            control = my_browser.form.find_control("ctl00$TextBoxName")
            if only_roman_chars(first_name):
                control.value = StripNonAlpha(first_name.encode('utf-8'))
            else:
                control.value = StripNonAlpha(
                    cyrillic2latin(first_name).encode('utf-8'))
            #check if value is enough
            #control.text = first_name
            scream.say('Control value is set to :' + str(control.value))
            submit_retry_counter = 4
            while True:
                try:
                    response = my_browser.submit()
                    html = response.read()
                    break
                except mechanize.HTTPError, e:
                    submit_retry_counter -= 1
                    if submit_retry_counter < 1:
                        raise StopIteration
                    error_message = 'Site genderchecker.com seems to have ' +\
                                    'internal problems. or my request is' +\
                                    ' wibbly-wobbly nonsense. HTTPError ' +\
                                    str(e.code) +\
                                    '. awaiting for 60s before retry'
                    scream.say(error_message)
                    scream.log_error(str(e.code) + ': ' + error_message)
                    time.sleep(60)
                except:
                    submit_retry_counter -= 1
                    if submit_retry_counter < 1:
                        raise StopIteration
                    error_message = 'Site genderchecker.com seems to have ' +\
                                    'internal problems. or my request is' +\
                                    ' wibbly-wobbly nonsense. ' +\
                                    'awaiting for 60s before retry'
                    scream.say(error_message)
                    scream.log_error(error_message)
                    time.sleep(60)
Ejemplo n.º 36
0
            are_working += 1
    return are_working


def num_modulo(thread_id_count__):
    global no_of_threads
    return thread_id_count__ % no_of_threads


if __name__ == "__main__":
    '''
    Starts process of work on CSV files which are output of Google Bigquery
    whenever intelliGit.py is executed as an standalone program
    the program reads through the input and gets all data bout programmers
    '''
    scream.say('Start main execution')
    scream.say('Welcome to WikiTeams.pl GitHub repo analyzer!')
    scream.say(version_name)

    secrets = []

    credential_list = []
    # reading the secrets, the Github factory objects will be created in next paragraph
    with open('pass.txt', 'r') as passfile:
        line__id = 0
        for line in passfile:
            line__id += 1
            secrets.append(line)
            if line__id % 4 == 0:
                login_or_token__ = str(secrets[0]).strip()
                pass_string = str(secrets[1]).strip()
Ejemplo n.º 37
0
    def analyze_with_splinter(self, repository):
        result = dict()
        scream.say('Starting webinterpret for ' + repository.html_url + '..')
        assert repository is not None
        url = repository.html_url
        assert url is not None

        while True:
            try:
                try:
                    self.splinter__browser.set_page_load_timeout(15)
                except:
                    scream.say('')
                
                try:
                    self.splinter__browser.ensure_success_response()
                except:
                    scream.say('')

                self.splinter__browser.visit(url)
                scream.say('Data from web retrieved')

                if splinter__driver == 'firefox':
                    doc = html.document_fromstring(unicode(self.splinter__browser.page_source))
                elif splinter__driver == 'chrome':
                    doc = html.document_fromstring(unicode(self.splinter__browser.html))
                elif splinter__driver == 'phantomjs':
                    doc = html.document_fromstring(unicode(self.splinter__browser.html))
                elif splinter__driver == 'zope.testbrowser':
                    #splinter__browser.set_handle_robots(False)
                    doc = html.document_fromstring(unicode(self.splinter__browser.html.decode('utf-8')))
                else:
                    assert False  # rest of browser not yet supported..
                scream.log_debug(str(url), True)
                scream.say('Continue to work on ' + url)
                scream.say('Page source sent further')

                #splinter__browser.screenshot(name=repository.key, suffix='.png')

                scream.say('Verify if 404 (repo deleted) otherwise keep on going')
                parallax = doc.xpath('//div[@id="parallax_illustration"]')

                if (len(parallax) > 0):
                    scream.say('Verified that 404 (repo deleted)')
                    result['status'] = '404'
                    break

                scream.say('Verified that not 404')

                scream.say('Verify if repo empty otherwise keep on going')
                repo_empty = doc.xpath('//div[@class="blankslate has-fixed-width"]')

                if (len(repo_empty) > 0):
                    scream.say('Verified that repo is empty')
                    result['status'] = 'EMPTY'
                    break

                scream.say('Verified that repo not empty')

                if splinter__driver == 'phantomjs':
                    #WebDriverWait(splinter__browser, 10).until(waiter)
                    while True:
                        scream.say("Wait for the AJAX to do the magic")
                        if self.splinter__browser.is_element_not_present_by_xpath('//span[@class="octicon octicon-organization"]//..//..//text()[normalize-space(.)="Fetching contributors"]', wait_time=5):
                            break
                        else:
                            scream.say("AJAX didnt work on time")
                    doc = html.document_fromstring(unicode(self.splinter__browser.html))

                assert "Fetching contributors" not in doc

                ns = doc.xpath('//ul[@class="numbers-summary"]')
                sunken = doc.xpath('//ul[@class="sunken-menu-group"]')

                scream.say('XPath made some search for ' + url + ' .. move on to bsoup..')
                scream.say('Xpath done searching')
                scream.say('Element found?: ' + str(len(ns) == 1))

                element = ns[0]
                element_sunken = sunken[0]
                local_soup = BeautifulSoup(etree.tostring(element))
                local_soup_sunken = BeautifulSoup(etree.tostring(element_sunken))

                enumarables = local_soup.findAll("li")
                enumarables_more = local_soup_sunken.findAll("li")

                commits = enumarables[0]
                scream.say('enumarables[0]')
                commits_number = analyze_tag(commits.find("span", {"class": "num"}))
                scream.say('analyze_tag finished execution for commits_number')
                scream.say('Before parse number: ' + str(commits_number))
                result['commits'] = parse_number(commits_number)
                scream.log_debug(result['commits'], True)
                scream.say('enumarables[1]')
                branches = enumarables[1]
                branches_number = analyze_tag(branches.find("span", {"class": "num"}))
                scream.say('Before parse number: ' + str(branches_number))
                result['branches'] = parse_number(branches_number)
                scream.log_debug(result['branches'], True)
                scream.say('enumarables[2]')
                releases = enumarables[2]
                releases_number = analyze_tag(releases.find("span", {"class": "num"}))
                scream.say('Before parse number: ' + str(releases_number))
                result['releases'] = parse_number(releases_number)
                scream.log_debug(result['releases'], True)

                scream.say('enumarables[3]')
                contributors = enumarables[3]
                contributors_number = analyze_tag(contributors.find("span", {"class": "num"}))
                scream.say('Before parse number: ' + str(contributors_number))
                result['contributors'] = parse_number(contributors_number)
                scream.log_debug(result['contributors'], True)

                result['issues'] = 0
                result['pulls'] = 0

                for enumerable___ in enumarables_more:
                    if enumerable___["aria-label"] == "Pull Requests":
                        pulls_tag = enumerable___
                        pulls_number = analyze_tag(pulls_tag.find("span", {"class": "counter"}))
                        scream.say('Before parse number: ' + str(pulls_number))
                        result['pulls'] = parse_number(pulls_number)
                    elif enumerable___["aria-label"] == "Issues":
                        issues_tag = enumerable___
                        issues_number = analyze_tag(issues_tag.find("span", {"class": "counter"}))
                        scream.say('Before parse number: ' + str(issues_number))
                        result['issues'] = parse_number(issues_number)
                
                result['status'] = 'OK'
                break
            except TypeError as ot:
                scream.say(str(ot))
                scream.say('Scrambled results (TypeError). Maybe GitHub down. Retry')
                time.sleep(5.0)
                if force_raise:
                    raise
            except Exception as e:
                scream.say(str(e))
                scream.say('No response from selenium. Retry')
                time.sleep(2.0)
                if force_raise:
                    raise

        assert 'status' in result
        return result
Ejemplo n.º 38
0
#class MyHTTPErrorProcessor(urllib2.HTTPErrorProcessor):
#    def http_response(self, request, response):
#        code, msg, hdrs = response.code, response.msg, response.info()
#        # only add this line to stop 302 redirection.
#        print response
#        if code == 302: return response
#        if not (200 <= code < 300):
#            response = self.parent.error(
#             'http', request, response, code, msg, hdrs)
#         print response
#     return response
# https_response = http_response


if __name__ == "__main__":
    scream.say('Start main execution')
    scream.say(version_name)
    scream.say('Program warming up, this should take just seconds..')

    method = 'native'
    sites = None
    add_delimiter_info = False
    geckoname = 'PopularHYIP-gecko.htm'

    try:
        opts, args = getopt.getopt(sys.argv[1:], "hm:s:i:vd", ["help", "method=", "sites=", "input=", "verbose", "delimiter"])
    except getopt.GetoptError as err:
        # print help information and exit:
        print str(err)  # will print something like "option -a not recognized"
        usage()
        sys.exit(2)
Ejemplo n.º 39
0
 def initiate_splinter(self):
     scream.say('Initiating splinter...')
     self.splinter__browser = Browser(splinter__driver, wait_time=timeout)
     scream.say('Splinter ready for action')
Ejemplo n.º 40
0
    def analyze_with_selenium(self, repository):
        result = dict()
        scream.say('Starting webinterpret for ' + repository.html_url + '..')
        assert repository is not None
        url = repository.html_url
        assert url is not None
        while True:
            try:
                self.browser.set_page_load_timeout(15)
                self.browser.get(url)
                scream.say('Data from web retrieved')
                doc = html.document_fromstring(unicode(self.browser.page_source))
                scream.log_debug(str(url), True)
                scream.say('Continue to work on ' + url)
                scream.say('Page source sent further')

                scream.say('Verify if 404 (repo deleted) otherwise keep on going')
                parallax = doc.xpath('//div[@id="parallax_illustration"]')

                if (len(parallax) > 0):
                    scream.say('Verified that 404 (repo deleted)')
                    result['status'] = '404'
                    break

                scream.say('Verified that not 404')

                scream.say('Verify if repo empty otherwise keep on going')
                repo_empty = doc.xpath('//div[@class="blankslate has-fixed-width"]')

                if (len(repo_empty) > 0):
                    scream.say('Verified that repo is empty')
                    result['status'] = 'EMPTY'
                    break

                scream.say('Verified that repo not empty')

                ns = doc.xpath('//ul[@class="numbers-summary"]')
                sunken = doc.xpath('//ul[@class="sunken-menu-group"]')

                scream.say('XPath made some search for ' + url + ' .. move on to bsoup..')
                scream.say('Xpath done searching')
                scream.say('Element found?: ' + str(len(ns) == 1))

                element = ns[0]
                element_sunken = sunken[0]
                local_soup = BeautifulSoup(etree.tostring(element))
                local_soup_sunken = BeautifulSoup(etree.tostring(element_sunken))

                enumarables = local_soup.findAll("li")
                enumarables_more = local_soup_sunken.findAll("li")

                commits = enumarables[0]
                scream.say('enumarables[0]')
                commits_number = analyze_tag(commits.find("span", {"class": "num"}))
                scream.say('analyze_tag finished execution for commits_number')
                scream.say('Before parse number: ' + str(commits_number))
                result['commits'] = parse_number(commits_number)
                scream.log_debug(result['commits'], True)
                scream.say('enumarables[1]')
                branches = enumarables[1]
                branches_number = analyze_tag(branches.find("span", {"class": "num"}))
                scream.say('Before parse number: ' + str(branches_number))
                result['branches'] = parse_number(branches_number)
                scream.log_debug(result['branches'], True)
                scream.say('enumarables[2]')
                releases = enumarables[2]
                releases_number = analyze_tag(releases.find("span", {"class": "num"}))
                scream.say('Before parse number: ' + str(releases_number))
                result['releases'] = parse_number(releases_number)
                scream.log_debug(result['releases'], True)
                scream.say('enumarables[3]')
                contributors = enumarables[3]
                contributors_number = analyze_tag(contributors.find("span", {"class": "num"}))
                scream.say('Before parse number: ' + str(contributors_number))
                result['contributors'] = parse_number(contributors_number)
                scream.log_debug(result['contributors'], True)

                result['issues'] = 0
                result['pulls'] = 0

                for enumerable___ in enumarables_more:
                    if enumerable___["aria-label"] == "Pull Requests":
                        pulls_tag = enumerable___
                        pulls_number = analyze_tag(pulls_tag.find("span", {"class": "counter"}))
                        scream.say('Before parse number: ' + str(pulls_number))
                        result['pulls'] = parse_number(pulls_number)
                    elif enumerable___["aria-label"] == "Issues":
                        issues_tag = enumerable___
                        issues_number = analyze_tag(issues_tag.find("span", {"class": "counter"}))
                        scream.say('Before parse number: ' + str(issues_number))
                        result['issues'] = parse_number(issues_number)
                
                result['status'] = 'OK'
                break
            except TypeError as ot:
                scream.say(str(ot))
                scream.say('Scrambled results (TypeError). Maybe GitHub down. Retry')
                time.sleep(5.0)
                if force_raise:
                    raise
            except Exception as e:
                scream.say(str(e))
                scream.say('No response from selenium. Retry')
                time.sleep(2.0)
                if force_raise:
                    raise

        assert 'status' in result
        return result
Ejemplo n.º 41
0
def execute_check():
    parser = argparse.ArgumentParser()
    parser.add_argument("-v",
                        "--verbose",
                        help="verbose messaging ? [True/False]",
                        action="store_true")
    args = parser.parse_args()
    if args.verbose:
        scream.intelliTag_verbose = True
        scream.say("verbosity turned on")

    threads = []

    # init connection to database
    first_conn = MSQL.connect(host=IP_ADDRESS,
                              port=3306,
                              user=open('mysqlu.dat', 'r').read(),
                              passwd=open('mysqlp.dat', 'r').read(),
                              db="github",
                              connect_timeout=50000000,
                              charset='utf8',
                              init_command='SET NAMES UTF8',
                              use_unicode=True)
    print 'Testing MySql connection...'
    print 'Pinging database: ' + (str(first_conn.ping(True)) if
                                  first_conn.ping(True) is not None else 'NaN')
    cursor = first_conn.cursor()
    cursor.execute(
        r'SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = "%s"'
        % 'github')
    rows = cursor.fetchall()
    print 'There are: ' + str(
        rows[0][0]) + ' table objects in the local GHtorrent copy'
    cursor.execute(
        r'SELECT table_name FROM information_schema.tables WHERE table_schema = "%s"'
        % 'github')
    rows = cursor.fetchall()
    if (u'users', ) and (u'projects', ) in rows:
        print 'All neccesary tables are there.'
    else:
        print 'Your database does not fit a typical description of a GitHub Torrent copy..'
        sys.exit(0)

    sample_tb_name = raw_input(
        "Please enter table/view name (of chosen data sample): ")
    cursor.execute(r'select count(distinct name) from ' + str(sample_tb_name) +
                   ' where ((name is not NULL) and (gender is NULL))')
    rows = cursor.fetchall()
    record_count = rows[0][0]
    cursor.close()

    scream.say(
        "Database seems to be working. Move on to getting list of users.")

    # populate list of users to memory
    cursor = first_conn.cursor()
    is_locked_tb = raw_input("Should I update [users] table instead of [" +
                             str(sample_tb_name) + "]? [y/n]: ")
    is_locked_tb = True if is_locked_tb in ['yes', 'y'] else False
    print 'Querying all names from the observations set.. This can take around 25-30 sec.'

    cursor.execute(r'select distinct name from ' + str(sample_tb_name) +
                   ' where ((name is not NULL) and (gender is NULL))')
    # if you are interested in how this table was created, you will probably need to read our paper and contact us as well
    # because we have some more tables with aggregated data compared to standard GitHub Torrent collection
    row = cursor.fetchone()
    iterator = 1.0

    min_name_length = 2
    print 'We hypothetize that minimum name length are ' \
        + str(min_name_length) + ' characters, like Ho, Sy, Lu'
    # http://www.answers.com/Q/What_is_the_shortest_name_in_the_world

    while row is not None:
        fullname = unicode(row[0])
        scream.log("\tFullname is: " + str(fullname.encode('unicode_escape')))
        iterator += 1
        print "[Progress]: " + str(
            (iterator / record_count) *
            100) + "% ----------- "  # [names] size: " + str(len(names))
        if len(fullname) < min_name_length:
            scream.log_warning(
                "--Found too short name field (" +
                str(fullname.encode('utf-8')) + ") from DB. Skipping..", True)
            row = cursor.fetchone()
            continue
        name = fullname.split()[0]
        # I find it quite uncommon to seperate name from surname with something else than a space
        # it does occur, but it's not in my interest to detect such human-generated dirty data at the moment
        scream.log("\tName is: " + str(name.encode('unicode_escape')))
        if name in names:
            if fullname in names[name]['persons']:
                scream.say(
                    "\tSuch fullname already classified! Rare, but can happen. Move on."
                )
            else:
                scream.say(
                    "\tAdding fullname to already classified name. Move on")
                names[name]['persons'].append(fullname)
        else:
            scream.say("\tNew name. Lets start classification.")
            names[name] = {'persons': list(), 'classification': None}
            names[name]['persons'].append(fullname)
            scream.say("\tStart the worker on name: " +
                       str(name.encode('utf-8')) + " as deriven from: " +
                       str(fullname.encode('utf-8')))
            # start the worker
            gg = GeneralGetter(int(iterator), name)
            scream.say('Creating instance of GeneralGetter complete')
            scream.say('Appending thread to collection of threads')
            threads.append(gg)
            scream.say('Append complete, threads[] now have size: ' +
                       str(len(threads)))
            scream.log_debug(
                'Starting thread ' + str(int(iterator) - 1) + '....', True)
            gg.start()
            while (num_working(threads) > 3):
                time.sleep(
                    0.2
                )  # sleeping for 200 ms - there are already 3 active threads..
        row = cursor.fetchone()

    cursor.close()
    print "Finished getting gender data, moving to database update..."

    for key in names.keys():
        collection = names[key]
        gender = collection['classification']
        for fullname in names[key]['persons']:
            cursor = first_conn.cursor()
            update_query = r'UPDATE {2} SET gender = {0} where name = "{1}"'.format(
                gender,
                fullname.encode('utf-8').replace('"', '\\"'),
                'users' if is_locked_tb else sample_tb_name)
            print update_query
            cursor.execute(update_query)
            cursor.close()

    first_conn.close()
 def cleanup(self):
     scream.say('Marking thread on ' + self.repo.getKey() + ' as finished..')
     self.finished = True
Ejemplo n.º 43
0
                    time.sleep(60)
            local_soup = BeautifulSoup(html)
            failed = local_soup.find("span",
                                     {"id":
                                      "ctl00_ContentPlaceHolder1_" +
                                      "LabelFailedSearchedFor"})
            if failed is not None:
                persist_users[s] = s + ',' + fullname.strip()
                return s + ',' + fullname.strip()
            gender_tag = local_soup.find("span",
                                         {"id":
                                         "ctl00_ContentPlaceHolder1_" +
                                         "LabelGenderFound"})
            if ((gender_tag is not None) and (gender_tag.contents is not None) and (len(gender_tag.contents) > 0)):
                gender = gender_tag.contents[0].string
                scream.say(gender)
                persist_users[s] = s + ',' + fullname.strip() + ',' + gender
                return s + ',' + fullname.strip() + ',' + gender
            else:
                scream.log_warning('Something really wrong, on result page there ' +
                                   'was no not-found label neither a proper result')
                persist_users[s] = s + ',' + fullname.strip()
                return s + ',' + fullname.strip()
        else:
            persist_users[s] = s + ',' + fullname.strip()
            return s + ',' + fullname.strip()
    else:
        scream.say('Fullname not provided')
        persist_users[s] = None
        return s
def developer_revealed(thread_getter_instance, repository, repo, contributor):
    global result_writer
    global result_punch_card_writer

    assert result_punch_card_writer is not None

    developer_login = contributor.login
    scream.log_debug(
        'Assigning a contributor: ' + str(developer_login) + ' to a repo: ' +
        str(repository.name), True)
    developer_name = contributor.name
    # 1. Ilosc osob, ktore dany deweloper followuje [FollowEvent]
    developer_followers = contributor.followers
    # 2. Ilosc osob, ktore followuja dewelopera [FollowEvent]
    developer_following = contributor.following

    developer_location = contributor.location
    developer_total_private_repos = contributor.total_private_repos
    developer_total_public_repos = contributor.public_repos

    # 5.  Ilosc repo, ktorych nie tworzyl, w ktorych jest team member [TeamAddEvent] [MemberEvent]
    developer_collaborators = contributor.collaborators
    # 6.  Ilosc repo, ktorych nie tworzyl, w ktorych jest contributorem [PushEvent] [IssuesEvent] [PullRequestEvent] [GollumEvent]
    developer_contributions = contributor.contributions

    # his_repositories - Ilosc projektow przez niego utworzonych / ktorych jest wlascicielem
    # his_repositories = contributor.get_repos()

    # 17. Czy commituje w godzinach pracy (zaleznie od strefy czasowej)?
    scream.log_debug(
        "Starting to analyze OSRC card for user: "******"Histogram for hours for user: " + str(developer_login) +
                ' created..', True)
            # count activity during business day
            count_bd__ = 0
            count_bd__ += sum(time_of_activity_per_hours[i]
                              for i in range(9, 18))
            # now count activity during not-busines hours :)
            count_nwh__ = 0
            count_nwh__ += sum(time_of_activity_per_hours[i]
                               for i in range(0, 9))
            count_nwh__ += sum(time_of_activity_per_hours[i]
                               for i in range(18, 24))
            developer_works_during_bd = True if count_bd__ >= count_nwh__ else False
            scream.log_debug('Running C program...', True)
            args___ = ['./hist_block'
                       ] + [str(x) for x in time_of_activity_per_hours]
            developer_works_period = subprocess.Popen(
                args___, stdout=subprocess.PIPE).stdout.read()
            # -----------------------------------------------------------------------
            scream.log_debug(
                'Finished analyze OSRC card for user: '******'OSRC gave error, probably 404')
            scream.say('try ' + str(tries) + ' more times')
            tries -= 1
        finally:
            if tries < 1:
                developer_works_during_bd = 0
                developer_works_period = 0
                break

    # Developer company (if any given)
    company = contributor.company
    created_at = contributor.created_at
    # Does the developer want to be hired?
    hireable = contributor.hireable
    disk_usage = contributor.disk_usage

    public_gists = contributor.public_gists
    owned_private_repos = contributor.owned_private_repos
    total_private_repos = contributor.total_private_repos

    scream.log_debug(
        'Thread ' + str(thread_getter_instance) +
        ' Finished revealing contributor: ' + str(developer_login) +
        ' in a repo: ' + str(repository.name), True)

    if show_trace:
        scream.log_debug('Printing traceback stack', True)
        traceback.print_stack()
        scream.log_debug('Printing traceback exc pathway', True)
        traceback.print_exc()

    if not use_utf8:
        result_writer.writerow([
            str(repo.getUrl()),
            str(repo.getName()),
            str(repo.getOwner()),
            str(repo.getStargazersCount()),
            str(repo.getWatchersCount()),
            str(repo.getCreatedAt()),
            str(repo.getDefaultBranch()),
            str(repo.getDescription()),
            str(repo.getIsFork()),
            str(repo.getForks()),
            str(repo.getForksCount()),
            str(repo.getHasDownloads()),
            str(repo.getHasWiki()),
            str(repo.getHasIssues()),
            str(repo.getLanguage()),
            str(repo.getMasterBranch()),
            str(repo.getNetworkCount()),
            str(repo.getOpenedIssues()),
            str(repo.getOrganization()),
            str(repo.getPushedAt()),
            str(repo.getUpdatedAt()),
            str(developer_login),
            str(developer_name if developer_name is not None else ''),
            str(developer_followers),
            str(developer_following),
            str(developer_collaborators),
            str(company if company is not None else ''),
            str(developer_contributions),
            str(created_at),
            str(hireable if hireable is not None else ''),
            str(developer_location if developer_location is not None else ''),
            str(developer_total_private_repos),
            str(developer_total_public_repos),
            str(developer_works_during_bd),
            str(developer_works_period),
            str(disk_usage),
            str(public_gists),
            str(owned_private_repos),
            str(total_private_repos)
        ])

    else:
        result_writer.writerow([
            repo.getUrl(),
            repo.getName(),
            repo.getOwner(),
            str(repo.getStargazersCount()),
            str(repo.getWatchersCount()),
            str(repo.getCreatedAt()),
            repo.getDefaultBranch(),
            repo.getDescription() if repo.getDescription() is not None else '',
            str(repo.getIsFork()),
            str(repo.getForks()),
            str(repo.getForksCount()),
            str(repo.getHasDownloads()),
            str(repo.getHasWiki()),
            str(repo.getHasIssues()),
            repo.getLanguage() if repo.getLanguage() is not None else '',
            repo.getMasterBranch()
            if repo.getMasterBranch() is not None else '',
            str(repo.getNetworkCount()),
            str(repo.getOpenedIssues()),
            repo.getOrganization()
            if repo.getOrganization() is not None else '',
            str(repo.getPushedAt()),
            str(repo.getUpdatedAt()), developer_login,
            developer_name if developer_name is not None else '',
            str(developer_followers),
            str(developer_following),
            str(developer_collaborators),
            company if company is not None else '',
            str(developer_contributions),
            str(created_at),
            str(hireable) if hireable is not None else '',
            developer_location if developer_location is not None else '',
            str(developer_total_private_repos),
            str(developer_total_public_repos),
            str(developer_works_during_bd),
            str(developer_works_period),
            str(disk_usage),
            str(public_gists),
            str(owned_private_repos),
            str(total_private_repos)
        ])

    scream.log_debug('Wrote row to CSV.', True)
Ejemplo n.º 45
0
def descr_user(s):
    if s in persist_users:
        if persist_users[s] is None:
            return s
        else:
            return persist_users[s]
    #get user name and surname here
    while True:
        try:
            response = urllib.urlopen('https://api.github.com/users/' + s
                                      + '?client_id='
                                      + client_id + '&client_secret='
                                      + client_secret)
            break
        except IOError:
            print 'API GitHub not responding, urlopen failed'
            print 'retrying after 1 minute'
            time.sleep(60)
    scream.ssay(response)
    data = simplejson.load(response)
    scream.ssay(data)
    #fullname = data['name']
    if 'name' in data:
        fullname = data['name']
    else:
        scream.say('Fullname not provided')
        persist_users[s] = None
        return s
    if fullname is None:
        scream.say('Fullname provided but an empty entry')
        persist_users[s] = None
        return s
    if (len(fullname) > 0):
        first_name = unicode(fullname.split()[0])
        if (len(first_name) > 0):
            scream.say('#ask now internet for gender')
            while True:
                try:
                    response = my_browser.open('http://genderchecker.com/')
                    response.read()
                    break
                except urllib2.URLError:
                    scream.ssay('Site genderchecker.com seems to be down' +
                                '. awaiting for 60s before retry')
                    time.sleep(60)
            scream.say('Response read. Mechanize selecting form.')
            my_browser.select_form("aspnetForm")
            my_browser.form.set_all_readonly(False)
            # allow everything to be written

            control = my_browser.form.find_control("ctl00$TextBoxName")
            if only_roman_chars(first_name):
                control.value = StripNonAlpha(first_name.encode('utf-8'))
            else:
                control.value = StripNonAlpha(cyrillic2latin(first_name).encode('utf-8'))
            #check if value is enough
            #control.text = first_name
            scream.say('Control value is set to :' + str(control.value))
            submit_retry_counter = 4
            while True:
                try:
                    response = my_browser.submit()
                    html = response.read()
                    break
                except mechanize.HTTPError, e:
                    submit_retry_counter -= 1
                    if submit_retry_counter < 1:
                        raise StopIteration
                    error_message = 'Site genderchecker.com seems to have ' +\
                                    'internal problems. or my request is' +\
                                    ' wibbly-wobbly nonsense. HTTPError ' +\
                                    str(e.code) +\
                                    '. awaiting for 60s before retry'
                    scream.say(error_message)
                    scream.log_error(str(e.code) + ': ' + error_message)
                    time.sleep(60)
                except:
                    submit_retry_counter -= 1
                    if submit_retry_counter < 1:
                        raise StopIteration
                    error_message = 'Site genderchecker.com seems to have ' +\
                                    'internal problems. or my request is' +\
                                    ' wibbly-wobbly nonsense. ' +\
                                    'awaiting for 60s before retry'
                    scream.say(error_message)
                    scream.log_error(error_message)
                    time.sleep(60)
Ejemplo n.º 46
0
                    time.sleep(60)
            local_soup = BeautifulSoup(html)
            failed = local_soup.find("span",
                                     {"id":
                                      "ctl00_ContentPlaceHolder1_" +
                                      "LabelFailedSearchedFor"})
            if failed is not None:
                persist_users[s] = s + ',' + fullname.strip()
                return s + ',' + fullname.strip()
            gender_tag = local_soup.find("span",
                                         {"id":
                                         "ctl00_ContentPlaceHolder1_" +
                                         "LabelGenderFound"})
            if ((gender_tag is not None) and (gender_tag.contents is not None) and (len(gender_tag.contents) > 0)):
                gender = gender_tag.contents[0].string
                scream.say(gender)
                persist_users[s] = s + ',' + fullname.strip() + ',' + gender
                return s + ',' + fullname.strip() + ',' + gender
            else:
                scream.log_warning('Something really wrong, on result page there ' +
                                   'was no not-found label neither a proper result')
                persist_users[s] = s + ',' + fullname.strip()
                return s + ',' + fullname.strip()
        else:
            persist_users[s] = s + ',' + fullname.strip()
            return s + ',' + fullname.strip()
    else:
        scream.say('Fullname not provided')
        persist_users[s] = None
        return s
Ejemplo n.º 47
0
def developer_revealed(thread_getter_instance, repository, repo, contributor):
    global result_writer
    global use_splinter

    developer_login = contributor.login
    scream.log_debug('Assigning a contributor: ' + str(developer_login) + ' to a repo: ' + str(repository.name), True)
    developer_name = contributor.name
    # 1 Ilosc osob, ktore dany deweloper followuje [FollowEvent]
    developer_followers = contributor.followers
    # 2 Ilosc osob, ktore followuja dewelopera [FollowEvent]
    developer_following = contributor.following

    developer_location = contributor.location
    developer_total_private_repos = contributor.total_private_repos
    developer_total_public_repos = contributor.public_repos

    # 5.  Ilosc repo, ktorych nie tworzyl, w ktorych jest team member [TeamAddEvent] [MemberEvent]
    developer_collaborators = contributor.collaborators
    # 6.  Ilosc repo, ktorych nie tworzyl, w ktorych jest contributorem [PushEvent] [IssuesEvent] [PullRequestEvent] [GollumEvent]
    developer_contributions = contributor.contributions

    # - Ilosc projektow przez niego utworzonych
    his_repositories = contributor.get_repos()

    while True:
        total_his_repositories = 0
        total_his_stars = 0
        total_his_watchers = 0
        total_his_forks = 0
        total_his_has_issues = 0
        total_his_has_wiki = 0
        total_his_open_issues = 0
        total_network_count = 0
        total_his_collaborators = 0
        total_his_contributors = 0

        if count___ == 'selenium':
            total_his_commits = 0
            total_his_branches = 0
            total_his_releases = 0
            total_his_issues = 0
            total_his_pull_requests = 0

        try:
            for his_repo in his_repositories:

                try:
                    total_his_repositories += 1
                    total_his_forks += his_repo.forks_count
                    total_his_stars += his_repo.stargazers_count
                    total_his_watchers += his_repo.watchers_count
                    total_his_has_issues += 1 if his_repo.has_issues else 0
                    total_his_has_wiki += 1 if his_repo.has_wiki else 0
                    total_his_open_issues += his_repo.open_issues
                    total_network_count += his_repo.network_count

                    if count___ == 'api':
                        # 3 Ilosc deweloperow, ktorzy sa w projektach przez niego utworzonych [PushEvent] [IssuesEvent] [PullRequestEvent] [GollumEvent]
                        total_his_contributors = None
                        while True:
                            try:
                                total_his_contributors = 0
                                #total_his_contributors = his_repo.get_contributors().totalCount -- this is buggy and will make errors
                                total_his_contributors += sum(1 for temp_object in his_repo.get_contributors())
                                break
                            except:
                                freeze('Exception in getting total_his_contributors')
                                if force_raise:
                                    raise
                        assert total_his_contributors is not None

                        # 4 Ilosc kontrybutorow, ktorzy sa w projektach przez niego utworzonych
                        total_his_collaborators = None
                        while True:
                            try:
                                total_his_collaborators = 0
                                #total_his_collaborators = his_repo.get_collaborators().totalCount -- this is buggy and will make errors
                                total_his_collaborators += sum(1 for temp_object in his_repo.get_collaborators())
                                break
                            except:
                                freeze('Exception in getting total_his_collaborators')
                                if force_raise:
                                    raise
                        assert total_his_collaborators is not None
                    elif count___ == 'selenium':
                        scream.say('Using selenium for thread about  ' + str(developer_login) + ' \'s repositories')
                        if use_splinter:
                            result = thread_getter_instance.analyze_with_splinter(his_repo)
                        else:
                            result = thread_getter_instance.analyze_with_selenium(his_repo)  # wyciagnij statystyki przez selenium, i zwroc w tablicy:
                        # commits, branches, releases, contributors, issues, pull requests
                        if result['status'] == '404':
                            continue
                        if result['status'] == 'EMPTY':
                            continue
                        total_his_commits += result['commits']
                        total_his_branches += result['branches']
                        total_his_releases += result['releases']
                        total_his_issues += result['issues']
                        total_his_pull_requests += result['pulls']
                        total_his_contributors += result['contributors']
                    else:  # hence it is only when not selenium is used
                        while True:
                            try:
                                his_contributors = set()
                                stats = his_repo.get_stats_contributors()
                                assert stats is not None
                                for stat in stats:
                                    if str(stat.author.login).strip() in ['None', '']:
                                        continue
                                    his_contributors.add(stat.author.login)
                                total_his_contributors += len(his_contributors)
                                break
                            except Exception as exc:
                                scream.log_warning('Not ready data while revealing details.. ' +
                                                   ', error({0})'.format(str(exc)), True)
                                freeze('StatsContribution not ready.. waiting for the server to provide good data')
                                if force_raise:
                                    raise
                except GithubException as e:
                    freeze(str(e) + ' in try per repo of x-dev repos')
                    if ("message" in e.data) and (e.data["message"].strip() == "Repository access blocked"):
                        scream.log_debug("It is a private repo.. Skip!")
                        continue
                    if force_raise:
                        raise
            break
        except Exception as e:
            freeze(str(e) + ' in main loop of developer_revealed()')
            his_repositories = contributor.get_repos()
            if force_raise:
                raise

    # Developer company (if any given)
    company = contributor.company
    created_at = contributor.created_at
    # Does the developer want to be hired?
    hireable = contributor.hireable

    scream.log_debug('Thread ' + str(thread_getter_instance.threadId) +
                     ' Finished revealing contributor: ' + str(developer_login) + ' in a repo: ' + str(repository.name), True)

    if show_trace:
        scream.log_debug('Printing traceback stack', True)
        traceback.print_stack()
        scream.log_debug('Printing traceback exc pathway', True)
        traceback.print_exc()

    if not use_utf8:
        result_writer.writerow([str(repo.getUrl()), str(repo.getName()), str(repo.getOwner()),
                               str(repo.getStargazersCount()), str(repo.getWatchersCount()), str(developer_login),
                               (str(developer_name) if developer_name is not None else ''), str(developer_followers), str(developer_following),
                               str(developer_collaborators), (str(company) if company is not None else ''), str(developer_contributions),
                               str(created_at), (str(hireable) if hireable is not None else ''),
                               str(total_his_repositories), str(total_his_stars), str(total_his_collaborators), str(total_his_contributors),
                               str(total_his_watchers), str(total_his_forks), str(total_his_has_issues),
                               str(total_his_has_wiki), str(total_his_open_issues), str(total_network_count),
                               (str(developer_location) if developer_location is not None else ''),
                               str(developer_total_private_repos), str(developer_total_public_repos),
                               str(total_his_issues), str(total_his_pull_requests)])
    else:
        result_writer.writerow([repo.getUrl(), repo.getName(), repo.getOwner(), str(repo.getStargazersCount()), str(repo.getWatchersCount()), developer_login,
                               (developer_name if developer_name is not None else ''), str(developer_followers), str(developer_following),
                               str(developer_collaborators), (company if company is not None else ''), str(developer_contributions),
                               str(created_at), (str(hireable) if hireable is not None else ''),
                               str(total_his_repositories), str(total_his_stars), str(total_his_collaborators), str(total_his_contributors),
                               str(total_his_watchers), str(total_his_forks), str(total_his_has_issues),
                               str(total_his_has_wiki), str(total_his_open_issues), str(total_network_count),
                               (developer_location if developer_location is not None else ''),
                               str(developer_total_private_repos), str(developer_total_public_repos),
                               str(total_his_issues), str(total_his_pull_requests)])

    scream.log_debug('Wrote row to CSV.', True)
 def cleanup(self):
     scream.say('Marking thread on ' + self.repo.getKey() +
                ' as finished..')
     self.finished = True
Ejemplo n.º 49
0
    time.sleep(sleepy_head_time)
    limit = gh.get_rate_limit()
    while limit.rate.remaining < 15:
        time.sleep(sleepy_head_time)


def freeze_more():
    freeze()


if __name__ == "__main__":
    '''
    Starts process of work on CSV files which are output of google bigquery
    whenever intelli_git.py is executed as an standalone program
    '''
    scream.say('Start main execution')
    scream.say('Welcome to WikiTeams.pl GitHub repo getter!')
    scream.say(version_name)

    secrets = []
    credential_list = []
    with open('pass.txt', 'r') as passfile:
        line__id = 0
        for line in passfile:
            line__id += 1
            secrets.append(line)
            if line__id % 4 == 0:
                login_or_token__ = str(secrets[0]).strip()
                pass_string = str(secrets[1]).strip()
                client_id__ = str(secrets[2]).strip()
                client_secret__ = str(secrets[3]).strip()
        float(s)  # for int, long and float
    except ValueError:
        try:
            complex(s)  # for complex
        except ValueError:
            return False
    return True


def analyze_tag(tag):
    number = filter(lambda x: x.isdigit(), str(tag))
    return number


if __name__ == "__main__":
    scream.say('Start main execution')
    scream.say('Welcome to WikiTeams.pl small dataset enricher!')

    skip_full_lists = True
    method = 'bs'
    add_delimiter_info = False
    use_selenium = True
    resume = None

    try:
        opts, args = getopt.getopt(sys.argv[1:], "hm:s:r:vd", ["help", "method=", "selenium=", "resume=", "verbose", "delimiter"])
    except getopt.GetoptError as err:
        # print help information and exit:
        print str(err)  # will print something like "option -a not recognized"
        usage()
        sys.exit(2)
            thread.cleanup()
    return are_working


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-tags", help="type of software [tags] you wish to parse from openhub", type=str)
    parser.add_argument("-r", "--resume", help="resume parse ? [True/False]", action="store_true")
    parser.add_argument("-resume_point", help="resume point (ordinal_id)", type=int)
    parser.add_argument("-fa", "--force_append", help="force appending results to CSV instead of overwrite", action="store_true")
    parser.add_argument("-v", "--verbose", help="verbose messaging ? [True/False]", action="store_true")
    parser.add_argument("-s", "--excel", help="add excel sepinfo at the beginning ? [True/False]", action="store_true")
    args = parser.parse_args()
    if args.verbose:
        scream.intelliTag_verbose = True
        scream.say("verbosity turned on")
    if args.tags:
        openhub_query_tags = args.tags.split(',')
        print 'Tags used to query open hub will be: ' + str(openhub_query_tags)
    if args.force_append:
        force_csv_append = True
    if args.excel:
        force_add_excelsep = True
    if args.resume_point:
        print 'Resume repo id is: ' + str(args.resume_point)

    assert len(openhub_query_tags) < 2 # I couldn't find a way to query openhub for multiple tags..

    first_conn = MSQL.connect(host="10.4.4.3", port=3306, user=open('mysqlu.dat', 'r').read(),
                              passwd=open('mysqlp.dat', 'r').read(), db="github", connect_timeout=50000000)
    print 'Testing mySql connection...'

def freeze():
    sleepy_head_time = 60 * 60
    time.sleep(sleepy_head_time)
    limit = gh.get_rate_limit()
    while limit.rate.remaining < 15:
        time.sleep(sleepy_head_time)


def freeze_more():
    freeze()


if __name__ == "__main__":
    scream.say('Start main execution')
    scream.say(version_name)

    secrets = []
    credential_list = []
    with open('pass.txt', 'r') as passfile:
        line__id = 0
        for line in passfile:
            line__id += 1
            secrets.append(line)
            if line__id % 4 == 0:
                login_or_token__ = str(secrets[0]).strip()
                pass_string = str(secrets[1]).strip()
                client_id__ = str(secrets[2]).strip()
                client_secret__ = str(secrets[3]).strip()
                credential_list.append({'login' : login_or_token__ , 'pass' : pass_string , 'client_id' : client_id__ , 'client_secret' : client_secret__})
Ejemplo n.º 53
0
def developer_revealed(thread_getter_instance, repository, repo, contributor):
    global result_writer
    global result_punch_card_writer

    assert result_punch_card_writer is not None

    developer_login = contributor.login
    scream.log_debug('Assigning a contributor: ' + str(developer_login) + ' to a repo: ' + str(repository.name), True)
    developer_name = contributor.name
    # 1. Ilosc osob, ktore dany deweloper followuje [FollowEvent]
    developer_followers = contributor.followers
    # 2. Ilosc osob, ktore followuja dewelopera [FollowEvent]
    developer_following = contributor.following

    developer_location = contributor.location
    developer_total_private_repos = contributor.total_private_repos
    developer_total_public_repos = contributor.public_repos

    # 5a.  Ilosc repo, w ktorych jest team member [TeamAddEvent] [MemberEvent]
    developer_collaborators = contributor.collaborators
    scream.say('Developer collaborators count is: ' + str(developer_collaborators))
    # 6a.  Ilosc repo, w ktorych jest contributorem [PushEvent] [IssuesEvent] [PullRequestEvent] [GollumEvent]
    developer_contributions = contributor.contributions
    scream.say('Developer contributions count is: ' + str(developer_contributions))

    # his_repositories - Ilosc projektow przez niego utworzonych / ktorych jest wlascicielem
    his_repositories = contributor.get_repos()

    # 17. Czy commituje w godzinach pracy (zaleznie od strefy czasowej)?
    scream.log_debug("Starting to analyze OSRC card for user: "******"Histogram for hours for user: "******"pod repozytorium"
                        while True:
                            try:
                                trying_to_get_stats += 1
                                stats = his_repo.get_stats_contributors()
                                status_code__ = get_status_code('https://api.github.com/repos/' + his_repo.full_name + '/stats/contributors') 
                                if status_code__ != 204:
                                    for s in stats:
                                        ad___c = 0
                                        ad___a = 0
                                        ad___d = 0
                                        for w in s.weeks:
                                            ad___c += w.c
                                            ad___a += w.a
                                            ad___d += w.d
                                        if s.author.login not in his_contributors:
                                            his_contributors.add(s.author.login)
                                        result_punch_card_writer.writerow([str(his_repo.owner.login), str(his_repo.name),
                                                                          str(developer_login), str(s.author.login), str(s.total), str(ad___c), str(ad___a), str(ad___d)])
                                else:
                                    scream.log_debug('The subrepo is empty, thus no statistics (punchcard) generated this time', True)
                                break
                            except GithubException as e:
                                freeze(str(e) + ' his_repo.get_stats_contributors(). Unexpected error with getting stats.')
                                if ("message" in e.data) and (e.data["message"].strip() == "Repository access blocked"):
                                    scream.log_debug("It is a private repo.. Skip!", True)
                                    break
                                if force_raise:
                                    raise
                            except TypeError as e:
                                scream.log_warning('This was stats attempt no: ' + str(trying_to_get_stats), True)
                                freeze(str(e) + ' his_repo.get_stats_contributors(). Punch-card not ready?')
                                # probably punch card not ready
                                if force_raise:
                                    raise
                            except Exception as e:
                                freeze(str(e) + ' his_repo.get_stats_contributors(). Punch-card not ready?')
                                # probably punch card not ready
                                if force_raise:
                                    raise

                        # 6. Ilosc repo, ktorych nie tworzyl, w ktorych jest deweloperem
                        if developer_login in his_contributors:
                            self_contributing += 1

                        # 5. Ilosc repo, ktorych nie tworzyl, w ktorych jest team member
                        subrepo_collaborators = his_repo.get_collaborators()
                        for collaborator in subrepo_collaborators:
                            total_his_collaborators += 1
                            if developer_login == collaborator.login:
                                self_collaborating += 1

                        # All elements paginated through his_repositories, thus we can safely break loop
                        break
                    except GithubException as e:
                        freeze('While getting subrepo details, ' + str(e) + ' in element his_repo in his_repositories')
                        if ("message" in e.data) and (e.data["message"].strip() == "Repository access blocked"):
                            scream.log_debug("It is a private repo.. Skip!")
                            continue
                        if force_raise:
                            raise
                    except TypeError as e:
                        freeze('While getting subrepo details, ' + str(e) + ' in element his_repo in his_repositories. Quota depleted?')
                        # probably punch card not ready
                        if force_raise:
                            raise
                    except Exception as e:
                        freeze('While getting subrepo details, ' + str(e) + ' in element his_repo in his_repositories. Quota depleted?')
                        # probably punch card not ready
                        if force_raise:
                            raise

            total_his_contributors = len(his_contributors)

            # 5.  Ilosc repo, ktorych nie tworzyl, w ktorych jest team member [TeamAddEvent] [MemberEvent]
            # tutaj od wartosci developer_collaborators wystarczy odjac wystapienia loginu w podrepo.get_collaborators()
            developer_foreign_collaborators = (developer_collaborators if developer_collaborators is not None else 0) - self_collaborating
            # 6.  Ilosc repo, ktorych nie tworzyl, w ktorych jest contributorem [PushEvent] [IssuesEvent] [PullRequestEvent] [GollumEvent]
            # tutaj od wartosci developer_contributions wystarczy odjac wystapienia loginu w podrepo.get_contributions()
            developer_foreign_contributions = developer_contributions - self_contributing

            # All properties checked for a dev, thus we can safely break loop
            break

        except Exception as e:
            freeze('Error ' + str(e) + ' in for his_repo in his_repositories loop. Will start the subrepo analysis from the beginning.')
            his_repositories = contributor.get_repos()
            if force_raise:
                raise

    # Developer company (if any given)
    company = contributor.company
    created_at = contributor.created_at
    # Does the developer want to be hired?
    hireable = contributor.hireable
    disk_usage = contributor.disk_usage

    public_gists = contributor.public_gists
    owned_private_repos = contributor.owned_private_repos
    total_private_repos = contributor.total_private_repos

    scream.log_debug('Thread ' + str(thread_getter_instance) +
                     ' Finished revealing contributor: ' + str(developer_login) + ' in a repo: ' + str(repository.name), True)

    if show_trace:
        scream.log_debug('Printing traceback stack', True)
        traceback.print_stack()
        scream.log_debug('Printing traceback exc pathway', True)
        traceback.print_exc()

    if not use_utf8:
        result_writer.writerow([str(repo.getUrl()), str(repo.getName()), str(repo.getOwner()),
                               str(repo.getStargazersCount()), str(repo.getWatchersCount()),

                               str(repo.getCreatedAt()), str(repo.getDefaultBranch()), str(repo.getDescription()),
                               str(repo.getIsFork()), str(repo.getForks()), str(repo.getForksCount()),
                               str(repo.getHasDownloads()), str(repo.getHasWiki()), str(repo.getHasIssues()),
                               str(repo.getLanguage()), str(repo.getMasterBranch()), str(repo.getNetworkCount()), str(repo.getOpenedIssues()),
                               str(repo.getOrganization()), str(repo.getPushedAt()), str(repo.getUpdatedAt()), str(repo.getPullsCount()),

                               str(total_his_contributors), str(total_his_collaborators), str(developer_foreign_collaborators),
                               str(developer_foreign_contributions), str(total_his_issues), str(total_his_pull_requests),

                               str(developer_login),
                               str(developer_name if developer_name is not None else ''), str(developer_followers), str(developer_following),
                               str(developer_collaborators), str(company if company is not None else ''), str(developer_contributions),
                               str(created_at), str(hireable if hireable is not None else ''),
                               str(developer_location if developer_location is not None else ''),
                               str(developer_total_private_repos), str(developer_total_public_repos),
                               str(developer_works_during_bd), str(developer_works_period), str(disk_usage),
                               str(public_gists), str(owned_private_repos), str(total_private_repos)])

    else:
        result_writer.writerow([repo.getUrl(), repo.getName(), repo.getOwner(), str(repo.getStargazersCount()), str(repo.getWatchersCount()),

                               str(repo.getCreatedAt()), repo.getDefaultBranch(), repo.getDescription() if repo.getDescription() is not None else '',
                               str(repo.getIsFork()), str(repo.getForks()), str(repo.getForksCount()),
                               str(repo.getHasDownloads()), str(repo.getHasWiki()), str(repo.getHasIssues()),
                               repo.getLanguage() if repo.getLanguage() is not None else '',
                               repo.getMasterBranch() if repo.getMasterBranch() is not None else '',
                               str(repo.getNetworkCount()), str(repo.getOpenedIssues()),
                               repo.getOrganization() if repo.getOrganization() is not None else '',
                               str(repo.getPushedAt()), str(repo.getUpdatedAt()), str(repo.getPullsCount()),

                               str(total_his_contributors), str(total_his_collaborators), str(developer_foreign_collaborators),
                               str(developer_foreign_contributions), str(total_his_issues), str(total_his_pull_requests),

                               developer_login,
                               developer_name if developer_name is not None else '', str(developer_followers), str(developer_following),
                               str(developer_collaborators), company if company is not None else '', str(developer_contributions),
                               str(created_at), str(hireable) if hireable is not None else '',
                               developer_location if developer_location is not None else '',
                               str(developer_total_private_repos), str(developer_total_public_repos),
                               str(developer_works_during_bd), str(developer_works_period), str(disk_usage),
                               str(public_gists), str(owned_private_repos), str(total_private_repos)])

    scream.log_debug('Wrote row to CSV.', True)