Ejemplo n.º 1
0
def retry_if_neccessary(gotten_tag, tagname, objectname, arg_objectname):
    how_long = 60
    if gotten_tag is None:
        #retry 3 times
        for i in range(0, 3):
            time.sleep(how_long)
            how_long *= 3

            while True:
                try:
                    local_filename_html, headers_html = urllib.urlretrieve(
                        html_addr, filename + '.html')
                    break
                except IOError:
                    io_socket_message = 'Socket error while retrieving HTML' +\
                                        ' file from GitHub! Internet or ' +\
                                        'GitHub down? Retry after 1 minute'
                    scream.ssay(io_socket_message)
                    scream.log_warning(io_socket_message)
                    time.sleep(60)

            soup = BeautifulSoup(html_content_file)
            gotten_tag = soup.find(tagname, {objectname: arg_objectname})
            if gotten_tag is not None:
                raise StopIteration
        if gotten_tag is None:
            #nothing to do here, lets move on
            scream.ssay('orphaned' + filename + '.json')
            scream.log_error(
                filename + '.json' + 'is without proper html. ' +
                'GitHub not responding or giving 404/501 erorr ??')
            return None
    scream.say(
        'No action required. retry_if_neccessary() returning gotten_tag')
    return gotten_tag
Ejemplo n.º 2
0
def retry_if_neccessary(gotten_tag, tagname, objectname, arg_objectname):
    how_long = 60
    if gotten_tag is None:
        #retry 3 times
        for i in range(0, 3):
            time.sleep(how_long)
            how_long *= 3

            while True:
                try:
                    local_filename_html, headers_html = urllib.urlretrieve(
                        html_addr, filename + '.html')
                    break
                except IOError:
                    io_socket_message = 'Socket error while retrieving HTML' +\
                                        ' file from GitHub! Internet or ' +\
                                        'GitHub down? Retry after 1 minute'
                    scream.ssay(io_socket_message)
                    scream.log_warning(io_socket_message)
                    time.sleep(60)

            soup = BeautifulSoup(html_content_file)
            gotten_tag = soup.find(tagname, {objectname: arg_objectname})
            if gotten_tag is not None:
                raise StopIteration
        if gotten_tag is None:
            #nothing to do here, lets move on
            scream.ssay('orphaned' + filename + '.json')
            scream.log_error(filename + '.json' + 'is without proper html. ' +
                             'GitHub not responding or giving 404/501 erorr ??')
            return None
    scream.say('No action required. retry_if_neccessary() returning gotten_tag')
    return gotten_tag
Ejemplo n.º 3
0
def slice_queue(queue, begin_arg, end_arg, percentage=False):
    scream.log_warning("Slicing the dequeue, percentage=" + str(percentage), True)
    if percentage:
        begin_arg = int( str( len(queue) * begin_arg ) )
        end_arg = int( str( len(queue) * end_arg ) )
        # percentage threshold
    right_index = len(queue) - end_arg
    for i in xrange(1, begin_arg):
        queue.popleft()
    for x in xrange(1, right_index):
        queue.pop()
Ejemplo n.º 4
0
def execute_check():
    parser = argparse.ArgumentParser()
    parser.add_argument("-v",
                        "--verbose",
                        help="verbose messaging ? [True/False]",
                        action="store_true")
    args = parser.parse_args()
    if args.verbose:
        scream.intelliTag_verbose = True
        scream.say("verbosity turned on")

    threads = []

    # init connection to database
    first_conn = MSQL.connect(host=IP_ADDRESS,
                              port=3306,
                              user=open('mysqlu.dat', 'r').read(),
                              passwd=open('mysqlp.dat', 'r').read(),
                              db="github",
                              connect_timeout=50000000,
                              charset='utf8',
                              init_command='SET NAMES UTF8',
                              use_unicode=True)
    print 'Testing MySql connection...'
    print 'Pinging database: ' + (str(first_conn.ping(True)) if
                                  first_conn.ping(True) is not None else 'NaN')
    cursor = first_conn.cursor()
    cursor.execute(
        r'SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = "%s"'
        % 'github')
    rows = cursor.fetchall()
    print 'There are: ' + str(
        rows[0][0]) + ' table objects in the local GHtorrent copy'
    cursor.execute(
        r'SELECT table_name FROM information_schema.tables WHERE table_schema = "%s"'
        % 'github')
    rows = cursor.fetchall()
    if (u'users', ) and (u'projects', ) in rows:
        print 'All neccesary tables are there.'
    else:
        print 'Your database does not fit a typical description of a GitHub Torrent copy..'
        sys.exit(0)

    sample_tb_name = raw_input(
        "Please enter table/view name (of chosen data sample): ")
    cursor.execute(r'select count(distinct name) from ' + str(sample_tb_name) +
                   ' where ((name is not NULL) and (gender is NULL))')
    rows = cursor.fetchall()
    record_count = rows[0][0]
    cursor.close()

    scream.say(
        "Database seems to be working. Move on to getting list of users.")

    # populate list of users to memory
    cursor = first_conn.cursor()
    is_locked_tb = raw_input("Should I update [users] table instead of [" +
                             str(sample_tb_name) + "]? [y/n]: ")
    is_locked_tb = True if is_locked_tb in ['yes', 'y'] else False
    print 'Querying all names from the observations set.. This can take around 25-30 sec.'

    cursor.execute(r'select distinct name from ' + str(sample_tb_name) +
                   ' where ((name is not NULL) and (gender is NULL))')
    # if you are interested in how this table was created, you will probably need to read our paper and contact us as well
    # because we have some more tables with aggregated data compared to standard GitHub Torrent collection
    row = cursor.fetchone()
    iterator = 1.0

    min_name_length = 2
    print 'We hypothetize that minimum name length are ' \
        + str(min_name_length) + ' characters, like Ho, Sy, Lu'
    # http://www.answers.com/Q/What_is_the_shortest_name_in_the_world

    while row is not None:
        fullname = unicode(row[0])
        scream.log("\tFullname is: " + str(fullname.encode('unicode_escape')))
        iterator += 1
        print "[Progress]: " + str(
            (iterator / record_count) *
            100) + "% ----------- "  # [names] size: " + str(len(names))
        if len(fullname) < min_name_length:
            scream.log_warning(
                "--Found too short name field (" +
                str(fullname.encode('utf-8')) + ") from DB. Skipping..", True)
            row = cursor.fetchone()
            continue
        name = fullname.split()[0]
        # I find it quite uncommon to seperate name from surname with something else than a space
        # it does occur, but it's not in my interest to detect such human-generated dirty data at the moment
        scream.log("\tName is: " + str(name.encode('unicode_escape')))
        if name in names:
            if fullname in names[name]['persons']:
                scream.say(
                    "\tSuch fullname already classified! Rare, but can happen. Move on."
                )
            else:
                scream.say(
                    "\tAdding fullname to already classified name. Move on")
                names[name]['persons'].append(fullname)
        else:
            scream.say("\tNew name. Lets start classification.")
            names[name] = {'persons': list(), 'classification': None}
            names[name]['persons'].append(fullname)
            scream.say("\tStart the worker on name: " +
                       str(name.encode('utf-8')) + " as deriven from: " +
                       str(fullname.encode('utf-8')))
            # start the worker
            gg = GeneralGetter(int(iterator), name)
            scream.say('Creating instance of GeneralGetter complete')
            scream.say('Appending thread to collection of threads')
            threads.append(gg)
            scream.say('Append complete, threads[] now have size: ' +
                       str(len(threads)))
            scream.log_debug(
                'Starting thread ' + str(int(iterator) - 1) + '....', True)
            gg.start()
            while (num_working(threads) > 3):
                time.sleep(
                    0.2
                )  # sleeping for 200 ms - there are already 3 active threads..
        row = cursor.fetchone()

    cursor.close()
    print "Finished getting gender data, moving to database update..."

    for key in names.keys():
        collection = names[key]
        gender = collection['classification']
        for fullname in names[key]['persons']:
            cursor = first_conn.cursor()
            update_query = r'UPDATE {2} SET gender = {0} where name = "{1}"'.format(
                gender,
                fullname.encode('utf-8').replace('"', '\\"'),
                'users' if is_locked_tb else sample_tb_name)
            print update_query
            cursor.execute(update_query)
            cursor.close()

    first_conn.close()
Ejemplo n.º 5
0
            })
            if failed is not None:
                persist_users[s] = s + ',' + fullname.strip()
                return s + ',' + fullname.strip()
            gender_tag = local_soup.find(
                "span",
                {"id": "ctl00_ContentPlaceHolder1_" + "LabelGenderFound"})
            if ((gender_tag is not None) and (gender_tag.contents is not None)
                    and (len(gender_tag.contents) > 0)):
                gender = gender_tag.contents[0].string
                scream.say(gender)
                persist_users[s] = s + ',' + fullname.strip() + ',' + gender
                return s + ',' + fullname.strip() + ',' + gender
            else:
                scream.log_warning(
                    'Something really wrong, on result page there ' +
                    'was no not-found label neither a proper result')
                persist_users[s] = s + ',' + fullname.strip()
                return s + ',' + fullname.strip()
        else:
            persist_users[s] = s + ',' + fullname.strip()
            return s + ',' + fullname.strip()
    else:
        scream.say('Fullname not provided')
        persist_users[s] = None
        return s


# This method tries to get HTML discussion site again again
# when there is a problem with finding by th BS
# proper html tags which allways indicate author
         branches = enumarables[1]
         branches_number = analyze_tag(branches.find("span", {"class": "num"}))
         print branches_number
         releases = enumarables[2]
         releases_number = analyze_tag(releases.find("span", {"class": "num"}))
         print releases_number
         contributors = enumarables[3]
         contributors_number = analyze_tag(contributors.find("span", {"class": "num"}))
         print contributors_number
         break
     except TypeError as ot:
         if use_selenium:
             output_nonexistent_writer.writerow(row)
             break
             # i already implemented selenium, so this won't have appliations anymore i believe :)
         scream.log_warning("TypeError error({0})".format(ot))
         scream.say(contributors)
         scream.say('Timeout: 5s')
         time.sleep(5)
         scream.say('Will try again... GitHub didnt manage to fetch on time?')
     except:
         if use_selenium:
             output_nonexistent_writer.writerow(row)
             break
     finally:
         scream.say('Moving on to writing row and next record...')
 row.append(commits_number)
 row.append(branches_number)
 row.append(releases_number)
 row.append(contributors_number)
 moredata_writer.writerow(row)
Ejemplo n.º 7
0
                if not ((resume_on_repo_name == repo.getName()) and
                        (resume_on_repo_owner == repo.getOwner())):
                    iteration_step_count += 1
                    continue
                else:
                    resume_on_repo = None
                    iteration_step_count += 1
                    continue

            try:
                repository = github_client.get_repo(repo.getKey())
                repo.setRepoObject(repository)
            except UnknownObjectException as e:
                scream.log_warning('Repo with key + ' + key +
                                   ' not found, error({0}): {1}'.
                                   format(e.status, e.data))
                repos_reported_nonexist.write(key + os.linesep)
                continue
            except:
                scream.log_warning('Repo with key + ' + key +
                                   ' not found, error({0}): {1}'.
                                   format(e.status, e.data))
                repos_reported_nonexist.write(key + os.linesep)
                continue

            iteration_step_count += 1
            scream.ssay('Step no ' + str(iteration_step_count) +
                        '. Working on a repo: ' + key)

            if resume_stage in [None, 'contributors']:
         assert repo.getWatchersCount() is not None
         # from this line move everything to a thread!
         scream.say('Create instance of GeneralGetter with ID ' + str(thread_id_count) + ' and token ' + str(current_ghc_desc))
         scream.log_debug('Make GeneralGetter object', True)
         gg = GeneralGetter(thread_id_count, repository, repo, current_ghc)
         scream.say('Creating instance of GeneralGetter complete')
         scream.say('Appending thread to collection of threads')
         threads.append(gg)
         scream.say('Append complete, threads[] now have size: ' + str(len(threads)))
         thread_id_count += 1
         scream.log_debug('Starting thread ' + str(thread_id_count-1) + '....', True)
         gg.start()
         break
 except UnknownObjectException as e:
     scream.log_warning('Repo with key + ' + key +
                        ' not found, error({0}): {1}'.
                        format(e.status, e.data), True)
     repos_reported_nonexist.write(key + os.linesep)
     continue
 except GithubException as e:
     scream.log_warning('Repo with key + ' + key +
                        ' made exception in API, error({0}): {1}'.
                        format(e.status, e.data), True)
     repos_reported_execution_error.write(key + os.linesep)
     freeze(str(e) + ' in the main loop (most top try-catch)')
     scream.say('Trying again with repo ' + str(key))
     if show_trace:
         scream.log_debug('Printing traceback stack', True)
         traceback.print_stack()
         scream.log_debug('Printing traceback exc pathway', True)
         traceback.print_exc()
Ejemplo n.º 9
0
def freeze(message):
    global sleepy_head_time
    scream.log_warning('Sleeping for ' + str(sleepy_head_time) + ' seconds. Reason: ' + str(message), True)
    time.sleep(sleepy_head_time)
Ejemplo n.º 10
0
def developer_revealed(thread_getter_instance, repository, repo, contributor):
    global result_writer

    developer_login = contributor.login
    scream.log_debug(
        'Assigning a contributor: ' + str(developer_login) + ' to a repo: ' +
        str(repository.name), True)
    developer_name = contributor.name
    # 1 Ilosc osob, ktore dany deweloper followuje [FollowEvent]
    developer_followers = contributor.followers
    # 2 Ilosc osob, ktore followuja dewelopera [FollowEvent]
    developer_following = contributor.following

    developer_location = contributor.location
    developer_total_private_repos = contributor.total_private_repos
    developer_total_public_repos = contributor.public_repos

    # 5.  Ilosc repo, ktorych nie tworzyl, w ktorych jest team member [TeamAddEvent] [MemberEvent]
    developer_collaborators = contributor.collaborators
    # 6.  Ilosc repo, ktorych nie tworzyl, w ktorych jest contributorem [PushEvent] [IssuesEvent] [PullRequestEvent] [GollumEvent]
    developer_contributions = contributor.contributions

    # - Ilosc projektow przez niego utworzonych
    his_repositories = contributor.get_repos()

    while True:
        total_his_repositories = 0
        total_his_stars = 0
        total_his_watchers = 0
        total_his_forks = 0
        total_his_has_issues = 0
        total_his_has_wiki = 0
        total_his_open_issues = 0
        total_network_count = 0
        total_his_collaborators = 0
        total_his_contributors = 0

        if count___ == 'selenium':
            total_his_commits = 0
            total_his_branches = 0
            total_his_releases = 0
            total_his_issues = 0
            total_his_pull_requests = 0

        try:
            for his_repo in his_repositories:

                try:
                    total_his_repositories += 1
                    total_his_forks += his_repo.forks_count
                    total_his_stars += his_repo.stargazers_count
                    total_his_watchers += his_repo.watchers_count
                    total_his_has_issues += 1 if his_repo.has_issues else 0
                    total_his_has_wiki += 1 if his_repo.has_wiki else 0
                    total_his_open_issues += his_repo.open_issues
                    total_network_count += his_repo.network_count

                    if count___ == 'api':
                        # 3 Ilosc deweloperow, ktorzy sa w projektach przez niego utworzonych [PushEvent] [IssuesEvent] [PullRequestEvent] [GollumEvent]
                        total_his_contributors = None
                        while True:
                            try:
                                total_his_contributors = 0
                                #total_his_contributors = his_repo.get_contributors().totalCount -- this is buggy and will make errors
                                total_his_contributors += sum(
                                    1 for temp_object in
                                    his_repo.get_contributors())
                                break
                            except:
                                freeze(
                                    'Exception in getting total_his_contributors'
                                )
                                if force_raise:
                                    raise
                        assert total_his_contributors is not None

                        # 4 Ilosc kontrybutorow, ktorzy sa w projektach przez niego utworzonych
                        total_his_collaborators = None
                        while True:
                            try:
                                total_his_collaborators = 0
                                #total_his_collaborators = his_repo.get_collaborators().totalCount -- this is buggy and will make errors
                                total_his_collaborators += sum(
                                    1 for temp_object in
                                    his_repo.get_collaborators())
                                break
                            except:
                                freeze(
                                    'Exception in getting total_his_collaborators'
                                )
                                if force_raise:
                                    raise
                        assert total_his_collaborators is not None
                    elif count___ == 'selenium':
                        scream.say('Using selenium for thread about  ' +
                                   str(developer_login) + ' \'s repositories')
                        result = thread_getter_instance.analyze_with_selenium(
                            his_repo
                        )  # wyciagnij statystyki przez selenium, i zwroc w tablicy:
                        # commits, branches, releases, contributors, issues, pull requests
                        if result['status'] == '404':
                            continue
                        if result['status'] == 'EMPTY':
                            continue
                        total_his_commits += result['commits']
                        total_his_branches += result['branches']
                        total_his_releases += result['releases']
                        total_his_issues += result['issues']
                        total_his_pull_requests += result['pulls']
                        total_his_contributors += result['contributors']
                    else:  # hence it is only when not selenium is used
                        while True:
                            try:
                                his_contributors = set()
                                stats = his_repo.get_stats_contributors()
                                assert stats is not None
                                for stat in stats:
                                    if str(stat.author.login).strip() in [
                                            'None', ''
                                    ]:
                                        continue
                                    his_contributors.add(stat.author.login)
                                total_his_contributors += len(his_contributors)
                                break
                            except Exception as exc:
                                scream.log_warning(
                                    'Not ready data while revealing details.. '
                                    + ', error({0})'.format(str(exc)), True)
                                freeze(
                                    'StatsContribution not ready.. waiting for the server to provide good data'
                                )
                                if force_raise:
                                    raise
                except GithubException as e:
                    freeze(str(e) + ' in try per repo of x-dev repos')
                    if ("message"
                            in e.data) and (e.data["message"].strip()
                                            == "Repository access blocked"):
                        scream.log_debug("It is a private repo.. Skip!")
                        continue
                    if force_raise:
                        raise
            break
        except Exception as e:
            freeze(str(e) + ' in main loop of developer_revealed()')
            his_repositories = contributor.get_repos()
            if force_raise:
                raise

    # Developer company (if any given)
    company = contributor.company
    created_at = contributor.created_at
    # Does the developer want to be hired?
    hireable = contributor.hireable

    scream.log_debug(
        'Thread ' + str(thread_getter_instance.threadId) +
        ' Finished revealing contributor: ' + str(developer_login) +
        ' in a repo: ' + str(repository.name), True)

    if show_trace:
        scream.log_debug('Printing traceback stack', True)
        traceback.print_stack()
        scream.log_debug('Printing traceback exc pathway', True)
        traceback.print_exc()

    if not use_utf8:
        result_writer.writerow([
            str(repo.getUrl()),
            str(repo.getName()),
            str(repo.getOwner()),
            str(repo.getStargazersCount()),
            str(repo.getWatchersCount()),
            str(developer_login),
            (str(developer_name) if developer_name is not None else ''),
            str(developer_followers),
            str(developer_following),
            str(developer_collaborators),
            (str(company) if company is not None else ''),
            str(developer_contributions),
            str(created_at), (str(hireable) if hireable is not None else ''),
            str(total_his_repositories),
            str(total_his_stars),
            str(total_his_collaborators),
            str(total_his_contributors),
            str(total_his_watchers),
            str(total_his_forks),
            str(total_his_has_issues),
            str(total_his_has_wiki),
            str(total_his_open_issues),
            str(total_network_count),
            (str(developer_location)
             if developer_location is not None else ''),
            str(developer_total_private_repos),
            str(developer_total_public_repos),
            str(total_his_issues),
            str(total_his_pull_requests)
        ])
    else:
        result_writer.writerow([
            repo.getUrl(),
            repo.getName(),
            repo.getOwner(),
            str(repo.getStargazersCount()),
            str(repo.getWatchersCount()), developer_login,
            (developer_name if developer_name is not None else ''),
            str(developer_followers),
            str(developer_following),
            str(developer_collaborators),
            (company if company is not None else ''),
            str(developer_contributions),
            str(created_at), (str(hireable) if hireable is not None else ''),
            str(total_his_repositories),
            str(total_his_stars),
            str(total_his_collaborators),
            str(total_his_contributors),
            str(total_his_watchers),
            str(total_his_forks),
            str(total_his_has_issues),
            str(total_his_has_wiki),
            str(total_his_open_issues),
            str(total_network_count),
            (developer_location if developer_location is not None else ''),
            str(developer_total_private_repos),
            str(developer_total_public_repos),
            str(total_his_issues),
            str(total_his_pull_requests)
        ])

    scream.log_debug('Wrote row to CSV.', True)
Ejemplo n.º 11
0
def developer_revealed(thread_getter_instance, repository, repo, contributor):
    global result_writer
    global use_splinter

    developer_login = contributor.login
    scream.log_debug('Assigning a contributor: ' + str(developer_login) + ' to a repo: ' + str(repository.name), True)
    developer_name = contributor.name
    # 1 Ilosc osob, ktore dany deweloper followuje [FollowEvent]
    developer_followers = contributor.followers
    # 2 Ilosc osob, ktore followuja dewelopera [FollowEvent]
    developer_following = contributor.following

    developer_location = contributor.location
    developer_total_private_repos = contributor.total_private_repos
    developer_total_public_repos = contributor.public_repos

    # 5.  Ilosc repo, ktorych nie tworzyl, w ktorych jest team member [TeamAddEvent] [MemberEvent]
    developer_collaborators = contributor.collaborators
    # 6.  Ilosc repo, ktorych nie tworzyl, w ktorych jest contributorem [PushEvent] [IssuesEvent] [PullRequestEvent] [GollumEvent]
    developer_contributions = contributor.contributions

    # - Ilosc projektow przez niego utworzonych
    his_repositories = contributor.get_repos()

    while True:
        total_his_repositories = 0
        total_his_stars = 0
        total_his_watchers = 0
        total_his_forks = 0
        total_his_has_issues = 0
        total_his_has_wiki = 0
        total_his_open_issues = 0
        total_network_count = 0
        total_his_collaborators = 0
        total_his_contributors = 0

        if count___ == 'selenium':
            total_his_commits = 0
            total_his_branches = 0
            total_his_releases = 0
            total_his_issues = 0
            total_his_pull_requests = 0

        try:
            for his_repo in his_repositories:

                try:
                    total_his_repositories += 1
                    total_his_forks += his_repo.forks_count
                    total_his_stars += his_repo.stargazers_count
                    total_his_watchers += his_repo.watchers_count
                    total_his_has_issues += 1 if his_repo.has_issues else 0
                    total_his_has_wiki += 1 if his_repo.has_wiki else 0
                    total_his_open_issues += his_repo.open_issues
                    total_network_count += his_repo.network_count

                    if count___ == 'api':
                        # 3 Ilosc deweloperow, ktorzy sa w projektach przez niego utworzonych [PushEvent] [IssuesEvent] [PullRequestEvent] [GollumEvent]
                        total_his_contributors = None
                        while True:
                            try:
                                total_his_contributors = 0
                                #total_his_contributors = his_repo.get_contributors().totalCount -- this is buggy and will make errors
                                total_his_contributors += sum(1 for temp_object in his_repo.get_contributors())
                                break
                            except:
                                freeze('Exception in getting total_his_contributors')
                                if force_raise:
                                    raise
                        assert total_his_contributors is not None

                        # 4 Ilosc kontrybutorow, ktorzy sa w projektach przez niego utworzonych
                        total_his_collaborators = None
                        while True:
                            try:
                                total_his_collaborators = 0
                                #total_his_collaborators = his_repo.get_collaborators().totalCount -- this is buggy and will make errors
                                total_his_collaborators += sum(1 for temp_object in his_repo.get_collaborators())
                                break
                            except:
                                freeze('Exception in getting total_his_collaborators')
                                if force_raise:
                                    raise
                        assert total_his_collaborators is not None
                    elif count___ == 'selenium':
                        scream.say('Using selenium for thread about  ' + str(developer_login) + ' \'s repositories')
                        if use_splinter:
                            result = thread_getter_instance.analyze_with_splinter(his_repo)
                        else:
                            result = thread_getter_instance.analyze_with_selenium(his_repo)  # wyciagnij statystyki przez selenium, i zwroc w tablicy:
                        # commits, branches, releases, contributors, issues, pull requests
                        if result['status'] == '404':
                            continue
                        if result['status'] == 'EMPTY':
                            continue
                        total_his_commits += result['commits']
                        total_his_branches += result['branches']
                        total_his_releases += result['releases']
                        total_his_issues += result['issues']
                        total_his_pull_requests += result['pulls']
                        total_his_contributors += result['contributors']
                    else:  # hence it is only when not selenium is used
                        while True:
                            try:
                                his_contributors = set()
                                stats = his_repo.get_stats_contributors()
                                assert stats is not None
                                for stat in stats:
                                    if str(stat.author.login).strip() in ['None', '']:
                                        continue
                                    his_contributors.add(stat.author.login)
                                total_his_contributors += len(his_contributors)
                                break
                            except Exception as exc:
                                scream.log_warning('Not ready data while revealing details.. ' +
                                                   ', error({0})'.format(str(exc)), True)
                                freeze('StatsContribution not ready.. waiting for the server to provide good data')
                                if force_raise:
                                    raise
                except GithubException as e:
                    freeze(str(e) + ' in try per repo of x-dev repos')
                    if ("message" in e.data) and (e.data["message"].strip() == "Repository access blocked"):
                        scream.log_debug("It is a private repo.. Skip!")
                        continue
                    if force_raise:
                        raise
            break
        except Exception as e:
            freeze(str(e) + ' in main loop of developer_revealed()')
            his_repositories = contributor.get_repos()
            if force_raise:
                raise

    # Developer company (if any given)
    company = contributor.company
    created_at = contributor.created_at
    # Does the developer want to be hired?
    hireable = contributor.hireable

    scream.log_debug('Thread ' + str(thread_getter_instance.threadId) +
                     ' Finished revealing contributor: ' + str(developer_login) + ' in a repo: ' + str(repository.name), True)

    if show_trace:
        scream.log_debug('Printing traceback stack', True)
        traceback.print_stack()
        scream.log_debug('Printing traceback exc pathway', True)
        traceback.print_exc()

    if not use_utf8:
        result_writer.writerow([str(repo.getUrl()), str(repo.getName()), str(repo.getOwner()),
                               str(repo.getStargazersCount()), str(repo.getWatchersCount()), str(developer_login),
                               (str(developer_name) if developer_name is not None else ''), str(developer_followers), str(developer_following),
                               str(developer_collaborators), (str(company) if company is not None else ''), str(developer_contributions),
                               str(created_at), (str(hireable) if hireable is not None else ''),
                               str(total_his_repositories), str(total_his_stars), str(total_his_collaborators), str(total_his_contributors),
                               str(total_his_watchers), str(total_his_forks), str(total_his_has_issues),
                               str(total_his_has_wiki), str(total_his_open_issues), str(total_network_count),
                               (str(developer_location) if developer_location is not None else ''),
                               str(developer_total_private_repos), str(developer_total_public_repos),
                               str(total_his_issues), str(total_his_pull_requests)])
    else:
        result_writer.writerow([repo.getUrl(), repo.getName(), repo.getOwner(), str(repo.getStargazersCount()), str(repo.getWatchersCount()), developer_login,
                               (developer_name if developer_name is not None else ''), str(developer_followers), str(developer_following),
                               str(developer_collaborators), (company if company is not None else ''), str(developer_contributions),
                               str(created_at), (str(hireable) if hireable is not None else ''),
                               str(total_his_repositories), str(total_his_stars), str(total_his_collaborators), str(total_his_contributors),
                               str(total_his_watchers), str(total_his_forks), str(total_his_has_issues),
                               str(total_his_has_wiki), str(total_his_open_issues), str(total_network_count),
                               (developer_location if developer_location is not None else ''),
                               str(developer_total_private_repos), str(developer_total_public_repos),
                               str(total_his_issues), str(total_his_pull_requests)])

    scream.log_debug('Wrote row to CSV.', True)
Ejemplo n.º 12
0
def output_data(repo):
    with open('repos.csv', 'ab') as output_csvfile:
        scream.ssay('repos.csv opened for append..')
        repowriter = UnicodeWriter(output_csvfile) if use_utf8 else csv.writer(output_csvfile, dialect=MyDialect)

        rn = repo.getRepoObject()
        rfc = repo.getForksCount()
        rwc = repo.getWatchersCount()
        rcc = repo.getContributorsCount()
        rsc = repo.getSubscribersCount()
        rstc = repo.getStargazersCount()
        rlc = repo.getLabelsCount()
        rcmc = repo.getCommitsCount()
        rpc = repo.getPullsCount()
        assert rfc.isdigit()
        assert rwc.isdigit()
        assert type(rcc) == int
        assert type(rsc) == int
        assert type(rstc) == int
        assert type(rlc) == int
        assert type(rcmc) == int
        assert type(rpc) == int

        tempv = (repo.getName(),
                 repo.getOwner(),
                 str(rfc),  # this is always string representation of number
                            # ! str() allowed
                 str(rwc),  # this is always string representation of number
                            # ! str() allowed
                 str(rcc),  # this is always int ! str() allowed
                 str(rsc),  # this is always int ! str() allowed
                 str(rstc),  # this is always int ! str() allowed
                 str(rlc),  # this is always int ! str() allowed
                 str(rcmc),  # this is always int ! str() allowed
                 str(rpc),  # this is always int ! str() allowed
                 rn.archive_url if rn.archive_url is not None else '',
                 rn.assignees_url if rn.assignees_url is not None else '',
                 rn.blobs_url if rn.blobs_url is not None else '',
                 rn.branches_url if rn.branches_url is not None else '',
                 rn.clone_url if rn.clone_url is not None else '',
                 rn.collaborators_url if rn.collaborators_url is not None else '',
                 rn.comments_url if rn.comments_url is not None else '',
                 rn.commits_url if rn.commits_url is not None else '',
                 rn.compare_url if rn.compare_url is not None else '',
                 rn.contents_url if rn.contents_url is not None else '',
                 rn.contributors_url if rn.contributors_url is not None else '',
                 str(rn.created_at) if rn.created_at is not None else '',
                 rn.default_branch if rn.default_branch is not None else '',
                 rn.description if rn.description is not None else '',
                 rn.events_url if rn.events_url is not None else '',
                 str(rn.fork) if rn.fork is not None else '',
                 rn.full_name if rn.full_name is not None else '',
                 rn.git_commits_url if rn.git_commits_url is not None else '',
                 rn.git_refs_url if rn.git_refs_url is not None else '',
                 rn.git_tags_url if rn.git_tags_url is not None else '',
                 str(rn.has_downloads) if rn.has_downloads is not None else '',
                 str(rn.has_wiki) if rn.has_wiki is not None else '',
                 rn.master_branch if rn.master_branch is not None else '')
        repowriter.writerow(tempv)

    with open('contributors.csv', 'ab') as output_csvfile:
        scream.ssay('contributors.csv opened for append..')
        contribwriter = UnicodeWriter(output_csvfile) if use_utf8 else csv.writer(output_csvfile, dialect=MyDialect)
        for contributor in repo.getContributors():
            tempv = (repo.getName(),
                     repo.getOwner(),
                     contributor.login)
            contribwriter.writerow(tempv)

    with open('commits.csv', 'ab') as output_csvfile:
        scream.ssay('commits.csv opened for append..')
        commitswriter = UnicodeWriter(output_csvfile) if use_utf8 else csv.writer(output_csvfile, dialect=MyDialect)
        for commit in repo.getCommits():
            tempv = (repo.getName(),
                     repo.getOwner(),
                     commit.sha,
                     (commit.author.login if commit.author is not None else ''),
                     (commit.committer.login if commit.committer is not None else ''),
                     commit.url,
                     commit.html_url,
                     commit.comments_url)
            commitswriter.writerow(tempv)

    if repo.getLanguages is not None:
        with open('languages.csv', 'ab') as output_csvfile:
            scream.ssay('languages.csv opened for append..')
            langwriter = UnicodeWriter(output_csvfile) if use_utf8 else csv.writer(output_csvfile, dialect=MyDialect)
            for language in repo.getLanguages():
                tempv = (repo.getName(),
                         repo.getOwner(),
                         language)
                langwriter.writerow(tempv)
    else:
        scream.log_warning('Repo ' + repo.getName() + ' has no languages[]')

    if repo.getContributors() is not None:
        with open('subscribers.csv', 'ab') as output_csvfile:
            scream.ssay('subscribers.csv opened for append..')
            subscriberswriter = UnicodeWriter(output_csvfile) if use_utf8 else csv.writer(output_csvfile, dialect=MyDialect)
            for subscriber in repo.getContributors():
                tempv = (repo.getName(),
                         repo.getOwner(),
                         (subscriber.login if subscriber.login is not None else ''),
                         (subscriber.bio if subscriber.bio is not None else ''),
                         (subscriber.blog if subscriber.blog is not None else ''),
                         str(subscriber.collaborators),
                         (subscriber.company if subscriber.company is not None else ''),
                         str(subscriber.contributions),
                         str(subscriber.followers),
                         str(subscriber.following))
                subscriberswriter.writerow(tempv)
    else:
        scream.log_warning('Repo ' + repo.getName() + ' has no subscribers[]')

    if repo.getLabels() is not None:
        with open('labels.csv', 'ab') as output_csvfile:
            scream.ssay('labels.csv opened for append..')
            labelswriter = UnicodeWriter(output_csvfile) if use_utf8 else csv.writer(output_csvfile, dialect=MyDialect)
            for label in repo.getLabels():
                tempv = (repo.getName(),
                         repo.getOwner(),
                         label.name,
                         label.color)
                labelswriter.writerow(tempv)
    else:
        scream.log_warning('Repo ' + repo.getName() + ' has no labels[]')

    if repo.getIssues() is not None:
        with open('issues.csv', 'ab') as output_csvfile:
            scream.ssay('issues.csv opened for append..')
            issueswriter = UnicodeWriter(output_csvfile) if use_utf8 else csv.writer(output_csvfile, dialect=MyDialect)
            for issue in repo.getIssues():
                assert (type(issue.id) == int or issue.id is None)
                assert (type(issue.number) == int or issue.number is None)
                tempv = (repo.getName(),
                         repo.getOwner(),
                         (issue.assignee.login if issue.assignee is not None else ''),
                         (' '.join(issue.body.splitlines()) if issue.body is not None else ''),
                         (issue.closed_at if issue.closed_at is not None else ''),
                         (issue.closed_by.login if issue.closed_by is not None else ''),
                         str(issue.id),
                         str(issue.number),
                         (issue.title if issue.title is not None else ''))
                issueswriter.writerow(tempv)
    else:
        scream.log_warning('Repo ' + repo.getName() + ' has no issues[]')

    if repo.getPulls() is not None:
        with open('pulls.csv', 'ab') as output_csvfile:
            scream.ssay('pulls.csv opened for append..')
            pullswriter = UnicodeWriter(output_csvfile) if use_utf8 else csv.writer(output_csvfile, dialect=MyDialect)
            for pull in repo.getPulls():
                tempv = (repo.getName(),
                         repo.getOwner(),
                         str(pull.additions),  # is always int
                         (pull.assignee.login if pull.assignee is not None else ''),
                         (' '.join(pull.body.splitlines()) if pull.body is not None else ''),
                         str(pull.changed_files),  # is always int
                         (str(pull.closed_at) if pull.closed_at is not None else ''),
                         str(pull.comments),  # is always int
                         (pull.comments_url if pull.comments_url is not None else ''),
                         (str(pull.created_at) if pull.created_at is not None else ''),
                         str(pull.deletions),  # is always int
                         (pull.diff_url if pull.diff_url is not None else ''),
                         (pull.html_url if pull.html_url is not None else ''),
                         str(pull.id),  # is always int
                         (pull.issue_url if pull.issue_url is not None else ''),
                         (pull.merge_commit_sha if pull.merge_commit_sha is not None else ''),
                         str(pull.mergeable),  # is always boolean
                         (pull.mergeable_state if pull.mergeable_state is not None else ''),
                         str(pull.merged),  # is always boolean
                         (str(pull.merged_at) if pull.merged_at is not None else ''),
                         str(pull.number),
                         (pull.patch_url if pull.patch_url is not None else ''),
                         (pull.review_comment_url if pull.review_comment_url is not None else ''),
                         str(pull.review_comments),  # is always int
                         (pull.review_comments_url if pull.review_comments_url is not None else ''),
                         (pull.state if pull.state is not None else ''),
                         (' '.join(pull.title.splitlines()) if pull.title is not None else ''),
                         (str(pull.updated_at) if pull.updated_at is not None else ''),
                         (pull.user.login if pull.user is not None else ''))
                pullswriter.writerow(tempv)
    else:
        scream.log_warning('Repo ' + repo.getName() + ' has no pulls[]')
Ejemplo n.º 13
0
    for filename in file_names:
        scream.say('------ WORKING WITH FILE : ' + filename)
        filename_ = 'data/' if sys.platform == 'linux2' else 'data\\'
        filename__ = filename_ + filename + '.csv'
        with open(filename__, 'rb') as source_csvfile:
            reposReader = csv.reader(source_csvfile,
                                     delimiter=',')
            reposReader.next()
            for row in reposReader:
                scream.log('Processing row: ' + str(row))
                name = row[0]
                owner = row[1]

                #here eleminate repos without owner, rly
                if len(owner.strip()) < 1:
                    scream.log_warning('Skipping orphan repo: ' + name)
                    continue
                    #print 'length < 1'

                '12. Liczba Fork'
                forks = row[2]
                watchers = row[3]
                key = owner + '/' + name
                scream.log('Key built: ' + key)

                repo = MyRepository()
                repo.setKey(key)
                repo.setInitials(name, owner, watchers, forks)

                #check here if repo dont exist already in dictionary!
                if key in repos:
Ejemplo n.º 14
0
                if not ((resume_on_repo_name == repo.getName()) and
                        (resume_on_repo_owner == repo.getOwner())):
                    iteration_step_count += 1
                    continue
                else:
                    resume_on_repo = None
                    iteration_step_count += 1
                    continue

            try:
                repository = github_client.get_repo(repo.getKey())
                repo.setRepoObject(repository)
            except UnknownObjectException as e:
                scream.log_warning(
                    'Repo with key + ' + key +
                    ' not found, error({0}): {1}'.format(e.status, e.data))
                repos_reported_nonexist.write(key + os.linesep)
                continue
            except:
                scream.log_warning(
                    'Repo with key + ' + key +
                    ' not found, error({0}): {1}'.format(e.status, e.data))
                repos_reported_nonexist.write(key + os.linesep)
                continue

            iteration_step_count += 1
            scream.ssay('Step no ' + str(iteration_step_count) +
                        '. Working on a repo: ' + key)

            if resume_stage in [None, 'contributors']:
Ejemplo n.º 15
0
                                      "ctl00_ContentPlaceHolder1_" +
                                      "LabelFailedSearchedFor"})
            if failed is not None:
                persist_users[s] = s + ',' + fullname.strip()
                return s + ',' + fullname.strip()
            gender_tag = local_soup.find("span",
                                         {"id":
                                         "ctl00_ContentPlaceHolder1_" +
                                         "LabelGenderFound"})
            if ((gender_tag is not None) and (gender_tag.contents is not None) and (len(gender_tag.contents) > 0)):
                gender = gender_tag.contents[0].string
                scream.say(gender)
                persist_users[s] = s + ',' + fullname.strip() + ',' + gender
                return s + ',' + fullname.strip() + ',' + gender
            else:
                scream.log_warning('Something really wrong, on result page there ' +
                                   'was no not-found label neither a proper result')
                persist_users[s] = s + ',' + fullname.strip()
                return s + ',' + fullname.strip()
        else:
            persist_users[s] = s + ',' + fullname.strip()
            return s + ',' + fullname.strip()
    else:
        scream.say('Fullname not provided')
        persist_users[s] = None
        return s


# This method tries to get HTML discussion site again again
# when there is a problem with finding by th BS
# proper html tags which allways indicate author
# or discussion content - a must content to exist
def freeze(message):
    global sleepy_head_time
    scream.log_warning('Sleeping for ' + str(sleepy_head_time) + ' seconds. Reason: ' + str(message), True)
    time.sleep(sleepy_head_time)
Ejemplo n.º 17
0
    for filename in file_names:
        scream.say('------ WORKING WITH FILE : ' + filename)
        filename_ = 'data/' if sys.platform == 'linux2' else 'data\\'
        filename__ = filename_ + filename + '.csv'
        with open(filename__, 'rb') as source_csvfile:
            reposReader = csv.reader(source_csvfile, delimiter=',')
            reposReader.next()
            for row in reposReader:
                scream.log('Processing row: ' + str(row))
                name = row[0]
                owner = row[1]

                #here eleminate repos without owner, rly
                if len(owner.strip()) < 1:
                    scream.log_warning('Skipping orphan repo: ' + name)
                    continue
                    #print 'length < 1'

                '12. Liczba Fork'
                forks = row[2]
                watchers = row[3]
                key = owner + '/' + name
                scream.log('Key built: ' + key)

                repo = MyRepository()
                repo.setKey(key)
                repo.setInitials(name, owner, watchers, forks)

                #check here if repo dont exist already in dictionary!
                if key in repos:
         gg = GeneralGetter(thread_id_count, repository, repo,
                            current_ghc)
         scream.say('Creating instance of GeneralGetter complete')
         scream.say('Appending thread to collection of threads')
         threads.append(gg)
         scream.say('Append complete, threads[] now have size: ' +
                    str(len(threads)))
         thread_id_count += 1
         scream.log_debug(
             'Starting thread ' + str(thread_id_count - 1) + '....',
             True)
         gg.start()
         break
 except UnknownObjectException as e:
     scream.log_warning(
         'Repo with key + ' + key +
         ' not found, error({0}): {1}'.format(e.status, e.data),
         True)
     repos_reported_nonexist.write(key + os.linesep)
     continue
 except GithubException as e:
     scream.log_warning(
         'Repo with key + ' + key +
         ' made exception in API, error({0}): {1}'.format(
             e.status, e.data), True)
     if ("message" in e.data) and (e.data["message"].strip()
                                   == "Repository access blocked"):
         scream.log_debug("It is now a private repo.. Skip!", True)
         continue
     repos_reported_execution_error.write(key + os.linesep)
     freeze(str(e) + ' in the main loop (most top try-catch)')
     scream.say('Trying again with repo ' + str(key))
Ejemplo n.º 19
0
def output_data(repo):
    with open('repos.csv', 'ab') as output_csvfile:
        scream.ssay('repos.csv opened for append..')
        repowriter = UnicodeWriter(output_csvfile) if use_utf8 else csv.writer(
            output_csvfile, dialect=MyDialect)

        rn = repo.getRepoObject()
        rfc = repo.getForksCount()
        rwc = repo.getWatchersCount()
        rcc = repo.getContributorsCount()
        rsc = repo.getSubscribersCount()
        rstc = repo.getStargazersCount()
        rlc = repo.getLabelsCount()
        rcmc = repo.getCommitsCount()
        rpc = repo.getPullsCount()
        assert rfc.isdigit()
        assert rwc.isdigit()
        assert type(rcc) == int
        assert type(rsc) == int
        assert type(rstc) == int
        assert type(rlc) == int
        assert type(rcmc) == int
        assert type(rpc) == int

        tempv = (
            repo.getName(),
            repo.getOwner(),
            str(rfc),  # this is always string representation of number
            # ! str() allowed
            str(rwc),  # this is always string representation of number
            # ! str() allowed
            str(rcc),  # this is always int ! str() allowed
            str(rsc),  # this is always int ! str() allowed
            str(rstc),  # this is always int ! str() allowed
            str(rlc),  # this is always int ! str() allowed
            str(rcmc),  # this is always int ! str() allowed
            str(rpc),  # this is always int ! str() allowed
            rn.archive_url if rn.archive_url is not None else '',
            rn.assignees_url if rn.assignees_url is not None else '',
            rn.blobs_url if rn.blobs_url is not None else '',
            rn.branches_url if rn.branches_url is not None else '',
            rn.clone_url if rn.clone_url is not None else '',
            rn.collaborators_url if rn.collaborators_url is not None else '',
            rn.comments_url if rn.comments_url is not None else '',
            rn.commits_url if rn.commits_url is not None else '',
            rn.compare_url if rn.compare_url is not None else '',
            rn.contents_url if rn.contents_url is not None else '',
            rn.contributors_url if rn.contributors_url is not None else '',
            str(rn.created_at) if rn.created_at is not None else '',
            rn.default_branch if rn.default_branch is not None else '',
            rn.description if rn.description is not None else '',
            rn.events_url if rn.events_url is not None else '',
            str(rn.fork) if rn.fork is not None else '',
            rn.full_name if rn.full_name is not None else '',
            rn.git_commits_url if rn.git_commits_url is not None else '',
            rn.git_refs_url if rn.git_refs_url is not None else '',
            rn.git_tags_url if rn.git_tags_url is not None else '',
            str(rn.has_downloads) if rn.has_downloads is not None else '',
            str(rn.has_wiki) if rn.has_wiki is not None else '',
            rn.master_branch if rn.master_branch is not None else '')
        repowriter.writerow(tempv)

    with open('contributors.csv', 'ab') as output_csvfile:
        scream.ssay('contributors.csv opened for append..')
        contribwriter = UnicodeWriter(
            output_csvfile) if use_utf8 else csv.writer(output_csvfile,
                                                        dialect=MyDialect)
        for contributor in repo.getContributors():
            tempv = (repo.getName(), repo.getOwner(), contributor.login)
            contribwriter.writerow(tempv)

    with open('commits.csv', 'ab') as output_csvfile:
        scream.ssay('commits.csv opened for append..')
        commitswriter = UnicodeWriter(
            output_csvfile) if use_utf8 else csv.writer(output_csvfile,
                                                        dialect=MyDialect)
        for commit in repo.getCommits():
            tempv = (repo.getName(), repo.getOwner(), commit.sha,
                     (commit.author.login if commit.author is not None else
                      ''), (commit.committer.login
                            if commit.committer is not None else ''),
                     commit.url, commit.html_url, commit.comments_url)
            commitswriter.writerow(tempv)

    if repo.getLanguages is not None:
        with open('languages.csv', 'ab') as output_csvfile:
            scream.ssay('languages.csv opened for append..')
            langwriter = UnicodeWriter(
                output_csvfile) if use_utf8 else csv.writer(output_csvfile,
                                                            dialect=MyDialect)
            for language in repo.getLanguages():
                tempv = (repo.getName(), repo.getOwner(), language)
                langwriter.writerow(tempv)
    else:
        scream.log_warning('Repo ' + repo.getName() + ' has no languages[]')

    if repo.getContributors() is not None:
        with open('subscribers.csv', 'ab') as output_csvfile:
            scream.ssay('subscribers.csv opened for append..')
            subscriberswriter = UnicodeWriter(
                output_csvfile) if use_utf8 else csv.writer(output_csvfile,
                                                            dialect=MyDialect)
            for subscriber in repo.getContributors():
                tempv = (
                    repo.getName(), repo.getOwner(),
                    (subscriber.login if subscriber.login is not None else ''),
                    (subscriber.bio if subscriber.bio is not None else ''),
                    (subscriber.blog if subscriber.blog is not None else ''),
                    str(subscriber.collaborators),
                    (subscriber.company if subscriber.company is not None else
                     ''), str(subscriber.contributions),
                    str(subscriber.followers), str(subscriber.following))
                subscriberswriter.writerow(tempv)
    else:
        scream.log_warning('Repo ' + repo.getName() + ' has no subscribers[]')

    if repo.getLabels() is not None:
        with open('labels.csv', 'ab') as output_csvfile:
            scream.ssay('labels.csv opened for append..')
            labelswriter = UnicodeWriter(
                output_csvfile) if use_utf8 else csv.writer(output_csvfile,
                                                            dialect=MyDialect)
            for label in repo.getLabels():
                tempv = (repo.getName(), repo.getOwner(), label.name,
                         label.color)
                labelswriter.writerow(tempv)
    else:
        scream.log_warning('Repo ' + repo.getName() + ' has no labels[]')

    if repo.getIssues() is not None:
        with open('issues.csv', 'ab') as output_csvfile:
            scream.ssay('issues.csv opened for append..')
            issueswriter = UnicodeWriter(
                output_csvfile) if use_utf8 else csv.writer(output_csvfile,
                                                            dialect=MyDialect)
            for issue in repo.getIssues():
                assert (type(issue.id) == int or issue.id is None)
                assert (type(issue.number) == int or issue.number is None)
                tempv = (repo.getName(), repo.getOwner(),
                         (issue.assignee.login if issue.assignee is not None
                          else ''), (' '.join(issue.body.splitlines())
                                     if issue.body is not None else ''),
                         (issue.closed_at if issue.closed_at is not None else
                          ''), (issue.closed_by.login
                                if issue.closed_by is not None else ''),
                         str(issue.id), str(issue.number),
                         (issue.title if issue.title is not None else ''))
                issueswriter.writerow(tempv)
    else:
        scream.log_warning('Repo ' + repo.getName() + ' has no issues[]')

    if repo.getPulls() is not None:
        with open('pulls.csv', 'ab') as output_csvfile:
            scream.ssay('pulls.csv opened for append..')
            pullswriter = UnicodeWriter(
                output_csvfile) if use_utf8 else csv.writer(output_csvfile,
                                                            dialect=MyDialect)
            for pull in repo.getPulls():
                tempv = (
                    repo.getName(),
                    repo.getOwner(),
                    str(pull.additions),  # is always int
                    (pull.assignee.login if pull.assignee is not None else ''),
                    (' '.join(pull.body.splitlines())
                     if pull.body is not None else ''),
                    str(pull.changed_files),  # is always int
                    (str(pull.closed_at)
                     if pull.closed_at is not None else ''),
                    str(pull.comments),  # is always int
                    (pull.comments_url
                     if pull.comments_url is not None else ''),
                    (str(pull.created_at)
                     if pull.created_at is not None else ''),
                    str(pull.deletions),  # is always int
                    (pull.diff_url if pull.diff_url is not None else ''),
                    (pull.html_url if pull.html_url is not None else ''),
                    str(pull.id),  # is always int
                    (pull.issue_url if pull.issue_url is not None else ''),
                    (pull.merge_commit_sha
                     if pull.merge_commit_sha is not None else ''),
                    str(pull.mergeable),  # is always boolean
                    (pull.mergeable_state
                     if pull.mergeable_state is not None else ''),
                    str(pull.merged),  # is always boolean
                    (str(pull.merged_at)
                     if pull.merged_at is not None else ''),
                    str(pull.number),
                    (pull.patch_url if pull.patch_url is not None else ''),
                    (pull.review_comment_url
                     if pull.review_comment_url is not None else ''),
                    str(pull.review_comments),  # is always int
                    (pull.review_comments_url
                     if pull.review_comments_url is not None else ''),
                    (pull.state if pull.state is not None else ''),
                    (' '.join(pull.title.splitlines())
                     if pull.title is not None else ''),
                    (str(pull.updated_at)
                     if pull.updated_at is not None else ''),
                    (pull.user.login if pull.user is not None else ''))
                pullswriter.writerow(tempv)
    else:
        scream.log_warning('Repo ' + repo.getName() + ' has no pulls[]')
                resume_on_repo_owner = resume_on_repo.split(',')[1]

                if not ((resume_on_repo_name == repo.getName()) and
                        (resume_on_repo_owner == repo.getOwner())):
                    iteration_step_count += 1
                    continue

            try:
                repository_object = repos[key]
                repository = gh.get_repo(key)
                print 'getting contributors'
                contributors = repository.get_contributors()
                repo.setRepoObject(repository)
            except UnknownObjectException as e:
                scream.log_warning('Repo with key + ' + key +
                                    ' not found, error({0}): {1}'.
                                    format(e.status, e.data))
                repos_reported_nonexist.append(key)
                continue

            try:
                languages = repository.get_languages()
                print str(languages)
                print repository.language
                print repository.languages_url
            except GithubException as gite:
                scream.log_warning('GithubException while gettings langs in + ' + key +
                                    ' , error({0}): {1}'.
                                    format(e.status, e.data))
                continue
Ejemplo n.º 21
0
def developer_revealed(thread_getter_instance, repository, repo, contributor):
    global result_writer
    global result_punch_card_writer

    assert result_punch_card_writer is not None

    developer_login = contributor.login
    scream.log_debug('Assigning a contributor: ' + str(developer_login) + ' to a repo: ' + str(repository.name), True)
    developer_name = contributor.name
    # 1. Ilosc osob, ktore dany deweloper followuje [FollowEvent]
    developer_followers = contributor.followers
    # 2. Ilosc osob, ktore followuja dewelopera [FollowEvent]
    developer_following = contributor.following

    developer_location = contributor.location
    developer_total_private_repos = contributor.total_private_repos
    developer_total_public_repos = contributor.public_repos

    # 5a.  Ilosc repo, w ktorych jest team member [TeamAddEvent] [MemberEvent]
    developer_collaborators = contributor.collaborators
    scream.say('Developer collaborators count is: ' + str(developer_collaborators))
    # 6a.  Ilosc repo, w ktorych jest contributorem [PushEvent] [IssuesEvent] [PullRequestEvent] [GollumEvent]
    developer_contributions = contributor.contributions
    scream.say('Developer contributions count is: ' + str(developer_contributions))

    # his_repositories - Ilosc projektow przez niego utworzonych / ktorych jest wlascicielem
    his_repositories = contributor.get_repos()

    # 17. Czy commituje w godzinach pracy (zaleznie od strefy czasowej)?
    scream.log_debug("Starting to analyze OSRC card for user: "******"Histogram for hours for user: "******"pod repozytorium"
                        while True:
                            try:
                                trying_to_get_stats += 1
                                stats = his_repo.get_stats_contributors()
                                status_code__ = get_status_code('https://api.github.com/repos/' + his_repo.full_name + '/stats/contributors') 
                                if status_code__ != 204:
                                    for s in stats:
                                        ad___c = 0
                                        ad___a = 0
                                        ad___d = 0
                                        for w in s.weeks:
                                            ad___c += w.c
                                            ad___a += w.a
                                            ad___d += w.d
                                        if s.author.login not in his_contributors:
                                            his_contributors.add(s.author.login)
                                        result_punch_card_writer.writerow([str(his_repo.owner.login), str(his_repo.name),
                                                                          str(developer_login), str(s.author.login), str(s.total), str(ad___c), str(ad___a), str(ad___d)])
                                else:
                                    scream.log_debug('The subrepo is empty, thus no statistics (punchcard) generated this time', True)
                                break
                            except GithubException as e:
                                freeze(str(e) + ' his_repo.get_stats_contributors(). Unexpected error with getting stats.')
                                if ("message" in e.data) and (e.data["message"].strip() == "Repository access blocked"):
                                    scream.log_debug("It is a private repo.. Skip!", True)
                                    break
                                if force_raise:
                                    raise
                            except TypeError as e:
                                scream.log_warning('This was stats attempt no: ' + str(trying_to_get_stats), True)
                                freeze(str(e) + ' his_repo.get_stats_contributors(). Punch-card not ready?')
                                # probably punch card not ready
                                if force_raise:
                                    raise
                            except Exception as e:
                                freeze(str(e) + ' his_repo.get_stats_contributors(). Punch-card not ready?')
                                # probably punch card not ready
                                if force_raise:
                                    raise

                        # 6. Ilosc repo, ktorych nie tworzyl, w ktorych jest deweloperem
                        if developer_login in his_contributors:
                            self_contributing += 1

                        # 5. Ilosc repo, ktorych nie tworzyl, w ktorych jest team member
                        subrepo_collaborators = his_repo.get_collaborators()
                        for collaborator in subrepo_collaborators:
                            total_his_collaborators += 1
                            if developer_login == collaborator.login:
                                self_collaborating += 1

                        # All elements paginated through his_repositories, thus we can safely break loop
                        break
                    except GithubException as e:
                        freeze('While getting subrepo details, ' + str(e) + ' in element his_repo in his_repositories')
                        if ("message" in e.data) and (e.data["message"].strip() == "Repository access blocked"):
                            scream.log_debug("It is a private repo.. Skip!")
                            continue
                        if force_raise:
                            raise
                    except TypeError as e:
                        freeze('While getting subrepo details, ' + str(e) + ' in element his_repo in his_repositories. Quota depleted?')
                        # probably punch card not ready
                        if force_raise:
                            raise
                    except Exception as e:
                        freeze('While getting subrepo details, ' + str(e) + ' in element his_repo in his_repositories. Quota depleted?')
                        # probably punch card not ready
                        if force_raise:
                            raise

            total_his_contributors = len(his_contributors)

            # 5.  Ilosc repo, ktorych nie tworzyl, w ktorych jest team member [TeamAddEvent] [MemberEvent]
            # tutaj od wartosci developer_collaborators wystarczy odjac wystapienia loginu w podrepo.get_collaborators()
            developer_foreign_collaborators = (developer_collaborators if developer_collaborators is not None else 0) - self_collaborating
            # 6.  Ilosc repo, ktorych nie tworzyl, w ktorych jest contributorem [PushEvent] [IssuesEvent] [PullRequestEvent] [GollumEvent]
            # tutaj od wartosci developer_contributions wystarczy odjac wystapienia loginu w podrepo.get_contributions()
            developer_foreign_contributions = developer_contributions - self_contributing

            # All properties checked for a dev, thus we can safely break loop
            break

        except Exception as e:
            freeze('Error ' + str(e) + ' in for his_repo in his_repositories loop. Will start the subrepo analysis from the beginning.')
            his_repositories = contributor.get_repos()
            if force_raise:
                raise

    # Developer company (if any given)
    company = contributor.company
    created_at = contributor.created_at
    # Does the developer want to be hired?
    hireable = contributor.hireable
    disk_usage = contributor.disk_usage

    public_gists = contributor.public_gists
    owned_private_repos = contributor.owned_private_repos
    total_private_repos = contributor.total_private_repos

    scream.log_debug('Thread ' + str(thread_getter_instance) +
                     ' Finished revealing contributor: ' + str(developer_login) + ' in a repo: ' + str(repository.name), True)

    if show_trace:
        scream.log_debug('Printing traceback stack', True)
        traceback.print_stack()
        scream.log_debug('Printing traceback exc pathway', True)
        traceback.print_exc()

    if not use_utf8:
        result_writer.writerow([str(repo.getUrl()), str(repo.getName()), str(repo.getOwner()),
                               str(repo.getStargazersCount()), str(repo.getWatchersCount()),

                               str(repo.getCreatedAt()), str(repo.getDefaultBranch()), str(repo.getDescription()),
                               str(repo.getIsFork()), str(repo.getForks()), str(repo.getForksCount()),
                               str(repo.getHasDownloads()), str(repo.getHasWiki()), str(repo.getHasIssues()),
                               str(repo.getLanguage()), str(repo.getMasterBranch()), str(repo.getNetworkCount()), str(repo.getOpenedIssues()),
                               str(repo.getOrganization()), str(repo.getPushedAt()), str(repo.getUpdatedAt()), str(repo.getPullsCount()),

                               str(total_his_contributors), str(total_his_collaborators), str(developer_foreign_collaborators),
                               str(developer_foreign_contributions), str(total_his_issues), str(total_his_pull_requests),

                               str(developer_login),
                               str(developer_name if developer_name is not None else ''), str(developer_followers), str(developer_following),
                               str(developer_collaborators), str(company if company is not None else ''), str(developer_contributions),
                               str(created_at), str(hireable if hireable is not None else ''),
                               str(developer_location if developer_location is not None else ''),
                               str(developer_total_private_repos), str(developer_total_public_repos),
                               str(developer_works_during_bd), str(developer_works_period), str(disk_usage),
                               str(public_gists), str(owned_private_repos), str(total_private_repos)])

    else:
        result_writer.writerow([repo.getUrl(), repo.getName(), repo.getOwner(), str(repo.getStargazersCount()), str(repo.getWatchersCount()),

                               str(repo.getCreatedAt()), repo.getDefaultBranch(), repo.getDescription() if repo.getDescription() is not None else '',
                               str(repo.getIsFork()), str(repo.getForks()), str(repo.getForksCount()),
                               str(repo.getHasDownloads()), str(repo.getHasWiki()), str(repo.getHasIssues()),
                               repo.getLanguage() if repo.getLanguage() is not None else '',
                               repo.getMasterBranch() if repo.getMasterBranch() is not None else '',
                               str(repo.getNetworkCount()), str(repo.getOpenedIssues()),
                               repo.getOrganization() if repo.getOrganization() is not None else '',
                               str(repo.getPushedAt()), str(repo.getUpdatedAt()), str(repo.getPullsCount()),

                               str(total_his_contributors), str(total_his_collaborators), str(developer_foreign_collaborators),
                               str(developer_foreign_contributions), str(total_his_issues), str(total_his_pull_requests),

                               developer_login,
                               developer_name if developer_name is not None else '', str(developer_followers), str(developer_following),
                               str(developer_collaborators), company if company is not None else '', str(developer_contributions),
                               str(created_at), str(hireable) if hireable is not None else '',
                               developer_location if developer_location is not None else '',
                               str(developer_total_private_repos), str(developer_total_public_repos),
                               str(developer_works_during_bd), str(developer_works_period), str(disk_usage),
                               str(public_gists), str(owned_private_repos), str(total_private_repos)])

    scream.log_debug('Wrote row to CSV.', True)