Exemple #1
0
def find_named_ecosystems(s, cutoff=5, g=None):
    """Return dict of named ecosystems and their sizes (split by . and - seperators.)"""
    if not g:
        g = nx.DiGraph(get_pkg_edgelist(s))
    # Consider something worthy of searching if its indegree is more or equal to cutoff
    indegs = [i for i in g.in_degree().items() if i[1] >= cutoff]
    search_names = []
    for t in indegs:
        split_char = ''
        pkg_name = s.query(
            db.Package.name).filter(db.Package.id == t[0]).first()[0]
        split_search = re.search('\w+([.-])', pkg_name)
        if split_search and len(split_search.groups()) == 1:
            split_char = split_search.group(1)
            if not pkg_name.split(split_char)[0] in search_names:
                search_names.append(pkg_name.split(split_char)[0])

    def name_searcher(sep_char, search_names):
        returner = []
        for n in search_names:
            name_count = s.query(db.Package.name).filter(
                db.Package.name.startswith(n + sep_char)).count()
            returner.append([n, name_count])
        returner.sort(key=lambda tup: tup[1], reverse=True)
        returner = [r for r in returner if r[1] > 1]
        return returner

    # dot/dash search
    return {
        'dot-ecosystems': name_searcher('.', search_names),
        'dash-ecosystems': name_searcher('-', search_names)
    }
def packages_with_selfloops(s, g=None):
    """Return a list of Packages which require themselves."""
    if not g:
        g = create_graph(get_pkg_nodelist(s), get_pkg_edgelist(s))
    id_list = g.nodes_with_selfloops()
    names = []
    for i in id_list:
        names.append(
            s.query(db.Package.name).filter(db.Package.id == i).first())
    return names
Exemple #3
0
def top_required_packages(s, top=None, g=None):
    """Return list of top required packages and the number of times they are required."""
    if not g:
        g = nx.DiGraph(get_pkg_edgelist(s))
    indegs = list(g.in_degree().items())
    indegs.sort(key=lambda tup: tup[1], reverse=True)
    named_top = []
    for t in indegs[:top]:
        named_top.append(
            [s.query(db.Package).filter(db.Package.id == t[0]).first(), t[1]])
    return named_top
def package_out_degree_distribution_chart(s, filename, g=None):
    """Create a in degree distribution chart."""
    if not g:
        g = create_graph(get_pkg_nodelist(s), get_pkg_edgelist(s))
    deg_seq = sorted(g.out_degree().values(), reverse=True)
    plt.hist(deg_seq, bins=range(0, 20, 1), normed=True)
    plt.xticks(range(0, 20, 1))
    plt.title('Requirement graph outdegree distribution chart')
    plt.xlabel('Outdegree')
    plt.ylabel('Frequency')
    plt.savefig(filename)
    plt.close()
def strong_weak_package_connections(s, g=None):
    if not g:
        g = create_graph(get_pkg_nodelist(s), get_pkg_edgelist(s))
    strong = [
        t for t in list(nx.strongly_connected_components(g)) if len(t) > 1
    ]
    strong_names = []
    for c in strong:
        names = []
        for p in c:
            names.append(
                s.query(db.Package.name).filter(db.Package.id == p).first())
        strong_names.append(names)
    weak = [t for t in list(nx.weakly_connected_components(g)) if len(t) > 1]
    weak_names = []
    for c in weak:
        names = []
        for p in c:
            names.append(
                s.query(db.Package.name).filter(db.Package.id == p).first())
        weak_names.append(names)
    return {'strong': strong_names, 'weak': weak_names}
Exemple #6
0
def downloads_vs_indegree(s, filename, g=None):
    """Create chart of the number of downloads per package vs. the number of times it is required, and return this data as a dict."""
    if not g:
        g = nx.DiGraph(get_pkg_edgelist(s))
    plot_data = []
    for n in g.nodes():
        plot_data.append([
            g.in_degree(n),
            s.query(func.sum(db.Release.downloads)).filter(
                db.Release.current == True).filter(
                    db.Release.package_id == n).first()[0]
        ])
    y, x = zip(*plot_data)
    plt.loglog(x, y, marker=',', linestyle='None')
    plt.title('Downloads vs. # times required')
    plt.ylabel('# times required')
    plt.xlabel('Downloads')
    plt.ylim([0, 1000])
    #plt.xlim([0, max(i for i in x if i is not None)+25])
    plt.grid(True)
    plt.savefig(filename)
    plt.close()
    return plot_data