Example #1
0
def sleep(github):
    github_limits = github.get_rate_limit()
    if github_limits.core.remaining == 0:
        Common_Utilities.go_to_sleep(
            "API hour limit exceeded,Go to sleep for ", 3600)

    if github_limits.search.remaining == 0:
        Common_Utilities.go_to_sleep(
            "API minute limit exceeded,Go to sleep for ", 61)
Example #2
0
def get_top_repos():

    print("Retrieving list of top repos... \n")

    config_dict = Common_Utilities.read_config_file()  # read all ini data

    #find top repos that have pushed in the last X days
    time_span = int(config_dict["TIME_SPAN"])
    push_date = date.today() - datetime.timedelta(days=time_span)

    quick_sleep = int(
        config_dict["QUICK_SLEEP"])  # regular sleep after each iteration
    error_sleep = int(
        config_dict["ERROR_SLEEP"]
    )  # Sleep after a serious issue is detected from gitHib, should be around 10min, ie 600 sec
    max_size = int(
        config_dict["MAXSIZE"]
    )  # maximum number of repos to look for (top X); configured in config file

    github = None
    github = Github(config_dict["TOKEN"])  # pass the connection token

    output_file_name = "scripts/Popularity/Top_Repo.txt"  # this is the output file that we are going to send repo names to

    output_file = open(output_file_name, "w")
    output_file.close()

    query = "pushed:>" + str(
        push_date) + " " + config_dict["REPO_SEARCH_QUERY"]
    print(query)

    query_repo(output_file_name, query, github, quick_sleep, error_sleep,
               max_size)

    print("\nFinally ..... Execution is over \n")
def getLastDiscussedDates():

    config_dict = Common_Utilities.read_config_file()  # read all config data
    user_api_key = config_dict["SO_TOKEN"]

    so = stackexchange.Site(stackexchange.StackOverflow,
                            app_key=user_api_key,
                            impose_throttling=True)
    so.be_inclusive()

    data = loadLastDiscussedSOData()

    libraries = Library.objects.all()

    for library in libraries:
        tag = library.so_tag
        questions = so.questions(sort='creation',
                                 order='DESC',
                                 tagged=[tag, 'java'])

        dates_string = ""
        for i in range(0, 10):
            if questions == None or i >= len(questions):
                break
            if i > 0:
                dates_string += ';'
            dates_string += questions[i].creation_date.strftime(
                "%m/%d/%Y, %H:%M:%S") + " UTC"

        if len(dates_string) == 0:
            data[tag] = None
        else:
            data[tag] = dates_string
        saveData(data)
Example #4
0
def get_issues():

    performance_classifier = PerformanceClassifier()
    security_classifier = SecurityClassifier()
    config_dict = Common_Utilities.read_config_file()  # read all config data

    lib_data_json = read_json_file(config_dict["LIBRARY_LIST"])

    print("Getting JIRA issue data")
    getIssueDataJIRA(lib_data_json, performance_classifier,
                     security_classifier)
    print("Getting GitHub issue data")
    getIssueData(config_dict["TOKEN"], performance_classifier,
                 security_classifier)
def addlibraries():
    config_dict = Common_Utilities.read_config_file()
    lines = []
    date = datetime.date.today()
    entrymonth = date.month
    entryyear = date.year

    repositories = []
    libraries = read_json_file(config_dict["LIBRARY_LIST"])

    for entry in libraries:
        domain_name = entry['Domain']
        library_name = entry['LibraryName']

        domain = Domain.objects.filter(name=domain_name)
        if not domain.exists():
            domain = Domain()
            domain.name = domain_name
            domain.save()
        else:
            domain = Domain.objects.get(name=domain_name)

        library = Library.objects.filter(name=library_name)

        #create new lib if it doesn't exist. Otherwise, update entry
        if not library.exists():
            library = Library()
        else:
            library = Library.objects.get(name=library_name)

        library.name = library_name
        library.so_tag = entry['SOtags']
        library.domain = domain
        library.package = entry['Package']
        library.github_repo = entry['FullRepoName']
        library.github_url = entry['GitHubURL']
        library.jira_url = entry['JIRAURL']
        library.maven_url = entry['MavenURL']
        library.save()

    #remove any libraries in the DB that are not in the config file (i.e., sync file and DB lib list)
    libraries_to_consider = [entry['LibraryName'] for entry in libraries]

    for library in Library.objects.all():
        if library.name in libraries_to_consider:
            continue
        else:
            print("Library ", library.name,
                  " no longer exists in config file. Removing its data")
            library.delete()
Example #6
0
def get_popularity():
    print("Getting popularity")
    config_dict = Common_Utilities.read_config_file()  # read all config data

    library_dict = read_libraries(
        config_dict["LIBRARY_LIST"])  # read all libraries to search against

    output_file_name = config_dict[
        "POPULARITY_OUTPUT_FILE"]  # this is the output file that we are going to send libraries with their total counts to

    output_file = open(output_file_name, "w")
    output_file.close()

    for keyword, repo in library_dict.items():
        print("for lib", repo)
        num_dependents = get_num_dependents(repo)
        send_totals_to_file(output_file_name, repo, num_dependents)
Example #7
0
def getLicenses():
    config_dict = Common_Utilities.read_config_file()  # read all ini data
    token = config_dict["TOKEN"]
    data = loadLicenseData()

    github = Github(token)
    libraries = Library.objects.all()

    for library in libraries:
        print("Getting license for ", library.name)
        try:
            repo = github.get_repo(library.github_repo)
            data[library.github_repo] = repo.get_license().license.name
            saveData(data)
        except UnknownObjectException:
            print("ERROR: could not get license for lib", library.name)
            traceback.print_exc()
            data[library.github_repo] = 'None'
            saveData(data)
            continue
def save_chart_in_db(pygal_chart, domain, metric_name):
    #save chart in DB

    metric = Metric.objects.get(name=metric_name)

    if not metric:
        print("No metric object found for metric: ", metric)
        return

    chart = Chart.objects.filter(domain=domain).filter(metric=metric)

    #create new chart if it doesn't exist. Otherwise, update entry
    if not chart.exists():
        chart = Chart()
        chart.domain = domain
        chart.metric = metric
    else:
        chart = Chart.objects.filter(domain=domain).get(metric=metric)

    chart.chart = Common_Utilities.chart_to_blob(
        pygal_chart)  #domain.name + chart_suffix)
    chart.save()
Example #9
0
def run():
    addlibraries()
    get_popularity()
    get_release_freq()
    getLicenses()
    getLastModificationDates()
    getLastDiscussedDates()
    get_issues()
    get_breaking_changes(
    )  #must be called after releases are fetched so after release frequency metric

    config_dict = Common_Utilities.read_config_file()
    output_path = config_dict["OUTPUT_PATH"]

    shutil.copy2('scripts/Popularity/popularity_results.txt', output_path)

    for file in glob.glob(r'scripts/License/*.pkl'):
        shutil.copy2(file, output_path)

    for file in glob.glob(r'scripts/LastModificationDate/*.pkl'):
        shutil.copy2(file, output_path)

    for file in glob.glob(r'scripts/LastDiscussedOnStackOverflow/*.pkl'):
        shutil.copy2(file, output_path)

    filldb()

    try:
        os.mkdir(output_path + "charts")
    except:
        print("Charts directory already exists")

    for file in glob.glob(output_path + r'*_chart.pkl'):
        shutil.copy2(file, output_path + "charts")

    for file in glob.glob(output_path + r'*.pkl'):
        os.remove(file)

    os.remove(output_path + "popularity_results.txt")
def getLastModificationDates():
    config_dict = Common_Utilities.read_config_file()  # read all config data
    token = config_dict["TOKEN"]

    data = loadLastModificationDateData()

    github = Github(token)
    libraries = Library.objects.all()

    for library in libraries:
        repo = github.get_repo(library.github_repo)
        dates_string = ""
        i = 0
        for c in repo.get_commits():
            if i == 10:
                break
            if i > 0:
                dates_string += ';'
            dates_string += c.commit.author.date.strftime(
                "%m/%d/%Y, %H:%M:%S") + " UTC"
            i += 1
        data[library.github_repo] = dates_string
        saveData(data)
def get_release_freq():
    config_dict = Common_Utilities.read_config_file()  # read all config data

    getReleaseDates(config_dict["TOKEN"])
Example #12
0
def query_repo(output_file_name, base_query, github, quick_sleep, error_sleep,
               max_size):

    repo_set = set()
    try:  #check github for rate limit
        rate_limit = github.get_rate_limit()
        rate = rate_limit.search
        print("The rate limit is %d" % rate.limit)

        if rate.remaining == 0:
            print('You have 0/%d API calls remianing. Reset time: %d' %
                  (rate.limit, rate.reset))
            Common_Utilities.go_to_sleep(
                "Reached API limit per minute, Going to sleep for ",
                quick_sleep)
        else:
            print('You have %d/%d API calls remaining' %
                  (rate.remaining, rate.limit))

        print('Base query: %s' % base_query)
        curr_query = base_query + " stars:>100"

        while len(repo_set) < max_size:
            print("Collected ", len(repo_set), " repos so far")
            print(curr_query)
            result = github.search_repositories(curr_query,
                                                sort='stars',
                                                order='desc')
            cnt = 0
            pgno = 1

            # 300 is how many repo's the script reads at a time (it was kept at 300 as reading more than that may result in a crash of the Github object
            while cnt <= 300:
                try:
                    for repo in result.get_page(pgno):
                        repo_set.add(repo.full_name)
                        cnt = cnt + 1

                        stars = repo.stargazers_count

                        if len(repo_set) == max_size:
                            break
                except:
                    Common_Utilities.go_to_sleep(
                        "API limit exceeded, Going to sleep for ", quick_sleep)
                    continue

                if len(repo_set) == max_size:
                    break

                pgno = pgno + 1

            curr_query = base_query + " stars:100.." + str(stars)

        output_to_file(output_file_name, repo_set)

    # error detection, just in case
    except Exception as e:
        output_to_file(output_file_name, repo_set)
        print(
            "Error: abuse detection mechanism detected.. outputting what we have..."
        )
        traceback.print_exc()