Python Graph Examples, pattern.graph.Graph Python Examples

Example #1

0

Show file

File: pattern_data.py Project: shohrukh92/osint_tools_security_auditing

def get_pattern_data(search_param):
   
   twitter = Twitter(language='en') 
   
   for tweet in twitter.search(search_param, cached=True):
      print(plaintext(tweet.text).encode('ascii', 'ignore').decode('utf-8'))
   

   g = Graph()
   for i in range(10):
      for result in twitter.search(search_param, start=i+1,count=50):
         s = result.text.lower() 
         s = plaintext(s)
         s = parsetree(s)
         p = '{NP} (VP) ' +search_param+ ' {NP}'
         for m in search(p, s):
            x = m.group(1).string # NP left
            y = m.group(2).string # NP right
            if x not in g:
               g.add_node(x)
               if y not in g:
                  g.add_node(y)
               g.add_edge(g[x], g[y], stroke=(0,0,0,0.75)) # R,G,B,A

   #if len(g)>0:   
   #   g = g.split()[0] # Largest subgraph.

   for n in g.sorted()[:40]: # Sort by Node.weight.
      n.fill = (0, 0.5, 1, 0.75 * n.weight)

   g.export('data', directed=False, weighted=0.6)

Example #2

0

Show file

File: ConceptGraph.py Project: Darth-Neo/nl_lib

 def subGraph(self):
     # Take the largest subgraph.
     h = self.g.split()[0]
     
     # Sort by Node.weight.i = 1
     i = 0
     newGraph = Graph()
     for n in h.sorted()[:30]:
         i += 1
         n.fill = (0, 0.5, 1, 0.75 * n.weight)
         logger.debug(u"i:%d=%s" % (i, n))
         newGraph.add_node(n.id)
         logger.debug(u"edges : %s" % n.edges)
 
         for e in n.edges:
             logger.debug(u"edge1 : %s, edge2 : %s" % (e.node1.id, e.node2.id))
             if e.node1.id == n.id:
                 newGraph.add_node(e.node2.id)
             else:
                 newGraph.add_node(e.node1.id)
             newGraph.add_edge(e.node1.id, e.node2.id, stroke=(0, 0, 0, 0.75))
     
     h = newGraph.split()
     
     return h

Example #3

0

Show file

File: dependency.py Project: vzhong/kbp2015

 def make_graph(cls, parse, enhanced=True):
     edge_map, node_map = {}, {}
     g = Graph()
     root = None
     for child, parent, arc in parse:
         if arc == 'root':
             root = child
         if not enhanced:
             arc = arc.split(':')[0]
         if child not in node_map:
             node_map[child] = Node(child)
         child = node_map[child]
         if parent not in node_map:
             node_map[parent] = Node(parent)
         parent = node_map[parent]
         if parent.id != child.id:
             g.add_edge(parent, child, type=arc)
     return g, edge_map, node_map, root

Example #4

0

Show file

File: ConceptGraph.py Project: Darth-Neo/nl_lib

    def __init__(self, homeDir=None):
        super(self.__class__, self).__init__()

        if homeDir is None:
            homeDir = os.getcwd()
            
        self.homeDir = homeDir + os.sep + u"html"
        
        if not os.path.exists(self.homeDir):
            os.makedirs(self.homeDir)
            
        self.g = Graph()

Example #5

0

Show file

File: html.py Project: Matrx63/seo

def make_graph(dgram, n, numWord):
    if n == 1:
        graph = Graph(distance=4.0)
        center = graph.add_node(' ', radius=0)
        center.fill = (0,0,0,0)
        for gram in dgram:
            key = gram
            w = dgram[gram] / numWord
            node = graph.add_node(key, centrality=w, radius=dgram[gram] + 1)
            node.fill = (0, 0.5, 1, node.radius * 0.1)
            graph.add_edge(center, node, length=2000/node.radius, stroke=(0,0,0,0)) # R,G,B,A
        graph.export('/home/matrx63/Web/monogram', pack=False, width='2000', height='2000', frames=5000, ipf=30)

Example #6

0

Show file

File: commonsense.py Project: kracekumar/pattern

 def __init__(self, data="commonsense.csv", **kwargs):
     """ A semantic network of common sense, using different relation types:
         - is-a,
         - is-part-of,
         - is-opposite-of,
         - is-property-of,
         - is-related-to,
         - is-same-as,
         - is-effect-of.
     """
     Graph.__init__(self, **kwargs)
     self._properties = None
     # Load data from the given path,
     # a CSV-file of (concept1, relation, concept2, context, weight)-items.
     if data is not None:
         s = open(data).read()
         s = s.strip(BOM_UTF8)
         s = s.decode("utf-8")
         s = ((v.strip("\"") for v in r.split(",")) for r in s.splitlines())
         for concept1, relation, concept2, context, weight in s:
             self.add_edge(concept1, concept2, 
                 type = relation, 
              context = context, 
               weight = min(int(weight)*0.1, 1.0))

Example #7

0

Show file

File: log_stat_graph.py Project: sangheestyle/analyze_repo

class LogStatGraph:

    def __init__(self, name=None):
        self.name = None
        self.graph = Graph()

    def load(self, log_stat):
        if self.name is None:
            self.name = log_stat.repo_name
        for commit in log_stat.commits:
            author_email = commit.ae
            self.graph.add_node(author_email, fill=BLACK_50)
            for diffstat in commit.diffstats:
                file_path = diffstat["file_path"]
                self.graph.add_node(file_path, stroke=BLACK_25, text=BLACK_15)
                self.graph.add_edge(author_email, file_path, stroke=BLACK_25)

    def prune(self, depth=0):
        self.graph.prune(depth)

    def export(self, path=None, **kwargs):
        if path is None:
            path = self.name
        self.graph.export(path, directed=True, weighted=True, **kwargs)

Example #8

0

Show file

File: example_using_pattern.py Project: yuandra/scraperwiki-scraper-vault

# Example of pattern: http://www.clips.ua.ac.be/pages/pattern

from pattern.web import Bing, plaintext
from pattern.en import Sentence, Chunk, parse
from pattern.search import Pattern
from pattern.graph import Graph, Node, Edge, export

g = Graph()
for i in range(1):
    print "--------------", i
    for r in Bing().search('"more important than"', start=i + 1, count=50):
        s = plaintext(r.description.lower())
        print s
        s = Sentence(parse(s))
        print s
        p = Pattern.fromstring('NP (VP) more important than NP')
        for m in p.search(s):
            a = m.constituents(p[+0])[-1]  # Left NP.
            b = m.constituents(p[-1])[+0]  # Right NP.
            a = (isinstance(a, Chunk) and a.head or a).string
            b = (isinstance(b, Chunk) and b.head or b).string
            if a and b:
                if a not in g:
                    g.add_node(a, radius=5, stroke=(0, 0, 0, 0.8))
                if b not in g:
                    g.add_node(b, radius=5, stroke=(0, 0, 0, 0.8))
                g.add_edge(g[b], g[a], stroke=(0, 0, 0, 0.6))

g = g.split()[0]  # Largest subgraph.

for n in g.sorted()[:40]:  # Sorted by Node.weight.

Example #9

0

Show file

File: 05-trends.py Project: BarcelonaMedia-ViL/pattern

comparisons = []

for i in range(1,10):
    # Set cached=False for live results:
    for result in Twitter(language="en").search("\"is the new\"", start=i, count=100, cached=True):
        s = result.text
        s = s.replace("\n", " ")
        s = s.lower()
        s = s.replace("is the new", "NEW")
        s = s.split(" ")
        try:
            i = s.index("NEW")
            A = s[i-1].strip("?!.:;,#@\"'")
            B = s[i+1].strip("?!.:;,#@\"'")
            # Exclude common phrases such as "this is the new thing".
            if A and B and A not in ("it", "this", "here", "what", "why", "where"):
                comparisons.append((A,B))
        except:
            pass

g = Graph()
for A, B in comparisons:
    e = g.add_edge(B, A) # "A is the new B": A <= B
    e.weight += 0.1
    print B, "=>", A

# Not all nodes will be connected, there will be multiple subgraphs.
# Simply take the largest subgraph for our visualization.
g = g.split()[0]

export(g, "trends", weight=True, weighted=True, directed=True, overwrite=True)

Example #10

0

Show file

File: 05-trends.py Project: DataBranner/pattern

for i in range(1, 10):
    # Set cached=False for live results:
    for result in Twitter(language="en").search("\"is the new\"", start=i, count=100, cached=True):
        s = result.text
        s = s.replace("\n", " ")
        s = s.lower()
        s = s.replace("is the new", "NEW")
        s = s.split(" ")
        try:
            i = s.index("NEW")
            A = s[i - 1].strip("?!.:;,#@\"'")
            B = s[i + 1].strip("?!.:;,#@\"'")
            # Exclude common phrases such as "this is the new thing".
            if A and B and A not in ("it", "this", "here", "what", "why", "where"):
                comparisons.append((A, B))
        except:
            pass

g = Graph()
for A, B in comparisons:
    e = g.add_edge(B, A)  # "A is the new B": A <= B
    e.weight += 0.1
    print(("%s => %s" % (B, A)).encode('utf-8'))

# Not all nodes will be connected, there will be multiple subgraphs.
# Simply take the largest subgraph for our visualization.
g = g.split()[0]

g.export("trends", weighted=True, directed=True)

Example #11

0

Show file

File: 05-trends.py Project: Abhishek-1/temp

                                                start=i,
                                                count=100,
                                                cached=True):
        s = result.text
        s = s.replace("\n", " ")
        s = s.lower()
        s = s.replace("is the new", "NEW")
        s = s.split(" ")
        try:
            i = s.index("NEW")
            A = s[i - 1].strip("?!.:;,#@\"'")
            B = s[i + 1].strip("?!.:;,#@\"'")
            # Exclude common phrases such as "this is the new thing".
            if A and B and A not in ("it", "this", "here", "what", "why",
                                     "where"):
                comparisons.append((A, B))
        except:
            pass

g = Graph()
for A, B in comparisons:
    e = g.add_edge(B, A)  # "A is the new B": A <= B
    e.weight += 0.1
    print(B, "=>", A)

# Not all nodes will be connected, there will be multiple subgraphs.
# Simply take the largest subgraph for our visualization.
g = g.split()[0]

g.export("trends", weighted=True, directed=True)

Example #12

0

Show file

import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))

from pattern.graph import Graph, WEIGHT, CENTRALITY, DEGREE, DEFAULT
from random import choice, random

# This example demonstrates how a graph visualization can be exported to HTML,
# using the HTML5 <canvas> tag and Javascript.
# All properties (e.g., stroke color) of nodes and edges are ported.

g = Graph()
# Random nodes.
for i in range(50):
    g.add_node(id=str(i + 1),
               radius=5,
               stroke=(0, 0, 0, 1),
               text = (0, 0, 0, 1))
# Random edges.
for i in range(75):
    node1 = choice(g.nodes)
    node2 = choice(g.nodes)
    g.add_edge(node1, node2,
               length=1.0,
               weight=random(),
               stroke=(0, 0, 0, 1))

for node in g.sorted()[:20]:
    # More blue = more important.
    node.fill = (0.6, 0.8, 1.0, 0.8 * node.weight)

Example #13

0

Show file

File: 02-export.py Project: cloudappsetup/pattern

import os, sys; sys.path.insert(0, os.path.join("..", ".."))

from pattern.graph import Graph
from pattern.graph import export, WEIGHT, CENTRALITY
from random        import choice, random

# This example demonstrates how a graph visualization can be exported to HTML,
# using the HTML5 <canvas> tag and Javascript.
# All properties (e.g. stroke color) of nodes and edges are ported.

g = Graph()
# Random nodes.
for i in range(50):
    g.add_node(id=str(i+1), 
        radius = 5,
        stroke = (0,0,0,1), 
          text = (0,0,0,1))
# Random edges.
for i in range(75):
    node1 = choice(g.nodes)
    node2 = choice(g.nodes)
    g.add_edge(node1, node2, 
        length = 1.0, 
        weight = random(), 
        stroke = (0,0,0,1))

for node in g.sorted()[:20]:
    # More blue = more important.
    node.fill = (0.6, 0.8, 1.0, 0.8 * node.weight)

# This node's label is different from its id.

Example #14

0

Show file

File: 03-template.py Project: Jsa542/pattern

\t<style type="text/css">
\t\t%s
\t</style>
</head>
<body> 
\t%s
</body>
</html>
'''.strip()


def webpage(graph, **kwargs):
    s1 = graph.serialize(CSS, **kwargs)
    s2 = graph.serialize(CANVAS, **kwargs)
    return template % (s1.replace("\n", "\n\t\t"), s2.replace("\n", "\n\t"))


# Create a graph:
g = Graph()
g.add_node("cat")
g.add_node("dog")
g.add_edge("cat", "dog")

# To make this work as a cgi-bin script, uncomment the following lines:
##!/usr/bin/env python
#import cgi
#import cgitb; cgitb.enable() # Debug mode.
#print "Content-type: text/html"

print webpage(g, width=500, height=500)

Example #15

0

Show file

from pattern.graph import Graph
import webbrowser

g = Graph()
n1 = "asdasd"
n2 = "two"
n3 = "three"
n4 = "four"
n5 = "five"

g.add_node(n1)
g.add_node(n2)
g.add_node(n3)
g.add_node(n4)
g.add_node(n5)

g.add_edge(n2, n3)
g.add_edge(n3, n4)
g.add_edge(n4, n5)
"""for n1, n2 in (
   ('cat', 'tail'), ('cat', 'purr'), ('purr', 'sound'),
   ('dog', 'tail'), ('dog', 'bark'), ('bark', 'sound')):
     g.add_node(n1)
     g.add_node(n2)
     g.add_edge(n1, n2, weight=0.0, type='is-related-to')"""

g.export('sound')

webbrowser.open(
    u"file:///Users/tobiasfuma/Desktop/FirmenbuchCrawler/sound/index.html")

Example #16

0

Show file

File: example_using_pattern.py Project: carriercomm/scraperwiki-scraper-vault

# Example of pattern: http://www.clips.ua.ac.be/pages/pattern

from pattern.web    import Bing, plaintext
from pattern.en     import Sentence, Chunk, parse
from pattern.search import Pattern
from pattern.graph  import Graph, Node, Edge, export
 
g = Graph()
for i in range(1):
    print "--------------", i
    for r in Bing().search('"more important than"', start=i+1, count=50):
        s = plaintext(r.description.lower())
        print s
        s = Sentence(parse(s))
        print s    
        p = Pattern.fromstring('NP (VP) more important than NP')
        for m in p.search(s):
            a = m.constituents(p[+0])[-1] # Left NP.
            b = m.constituents(p[-1])[+0] # Right NP.
            a = (isinstance(a, Chunk) and a.head or a).string
            b = (isinstance(b, Chunk) and b.head or b).string
            if a and b:
                if a not in g:
                    g.add_node(a, radius=5, stroke=(0,0,0,0.8))
                if b not in g:
                    g.add_node(b, radius=5, stroke=(0,0,0,0.8))
                g.add_edge(g[b], g[a], stroke=(0,0,0,0.6))

g = g.split()[0] # Largest subgraph.
 
for n in g.sorted()[:40]: # Sorted by Node.weight.

Example #17

0

Show file

 def remove(self, x):
     self._properties = None
     Graph.remove(self, x)

Example #18

0

Show file

 def add_edge(self, id1, id2, *args, **kwargs):
     """ Returns a Relation between two concepts (Edge subclass).
     """
     self._properties = None
     kwargs.setdefault("base", Relation)
     return Graph.add_edge(self, id1, id2, *args, **kwargs)

Example #19

0

Show file

 def add_node(self, id, *args, **kwargs):
     """ Returns a Concept (Node subclass).
     """
     self._properties = None
     kwargs.setdefault("base", Concept)
     return Graph.add_node(self, id, *args, **kwargs)

Example #20

0

Show file

            }
            images = document.getElementsByTagName('img');
            for(var i = 0; i < images.length; i++) {
	            images[i].ondragstart = function() { return false; };
            }
    };
    </script>
""" % {
    "image": css_image,
    "user": css_user
}

# Create new Instagram API.
api = InstagramAPI(access_token=access_token, client_secret=client_secret)
# Create new Graph.
graph = Graph(distance=distance)


# It is for finding user-id of an user.
# It takes only one username (string) as an argument and
#    returns an User object and its user-id (as string.)
# !! Exact username must be given as argument otherwise that function will return wrong user!
def find_user(username):
    if not username:
        print "Name is empty!"
        return None, None
    res = api.user_search(q="@" + username, count=1)
    if not res:
        print "{user} cannot be found!".format(user=username)
        return None, None
    ret_user = res[0]

Example #21

0

Show file

import os, sys
sys.path.insert(0, os.path.join("..", ".."))

from pattern.graph import Graph
from pattern.graph import export, WEIGHT, CENTRALITY
from random import choice, random

# This example demonstrates how a graph visualization can be exported to HTML,
# using the HTML5 <canvas> tag and Javascript.
# All properties (e.g. stroke color) of nodes and edges are ported.

g = Graph()
# Random nodes.
for i in range(50):
    g.add_node(id=str(i + 1), radius=5, stroke=(0, 0, 0, 1), text=(0, 0, 0, 1))
# Random edges.
for i in range(75):
    node1 = choice(g.nodes)
    node2 = choice(g.nodes)
    g.add_edge(node1, node2, length=1.0, weight=random(), stroke=(0, 0, 0, 1))

for node in g.sorted()[:20]:
    # More blue = more important.
    node.fill = (0.6, 0.8, 1.0, 0.8 * node.weight)

# This node's label is different from its id.
# We'll make it a hyperlink, see the href attribute at the bottom.
g["1"].text.string = "home"

# The export() command generates a folder with an index.html,
# that displays the graph using an interactive, force-based spring layout.

Example #22

0

Show file

File: visualization.py Project: riffschelder/reviewshub

def compare_visualization(product_sku, compare_phrase):
    all_reviews = ReviewInfo.objects.all().filter(sku=product_sku)
    g = Graph()

    count = 0.0
    for e in all_reviews :
        s = e.comment.lower() 
        s = plaintext(s)
        s = parsetree(s)
        #p = '{NP} (VP) faster than {NP}'
        p = '{NP} (VP) ' + compare_phrase + ' {NP}'
        for m in search(p, s):
            x = m.group(1).string # NP left
            y = m.group(2).string # NP right
            if x not in g:
                g.add_node(x)
            if y not in g:
                g.add_node(y)
            g.add_edge(g[x], g[y], stroke=(0,0,0,0.75)) # R,G,B,A
        count += 1.0
        print count/len(all_reviews), '\r'

    if len(g) > 0: 
        g = g.split()[0] # Largest subgraph.
        for n in g.sorted()[:80]: # Sort by Node.weight.
            n.fill = (0, 0.5, 1, 0.75 * n.weight)

        g.export('static/compare_visualization', directed=True, weighted=2.0)
        return True
    else: 
        return False

Example #23

0

Show file

File: commonsense.py Project: kracekumar/pattern

 def add_node(self, id, *args, **kwargs):
     """ Returns a Concept (Node subclass).
     """
     self._properties = None
     kwargs.setdefault("base", Concept)
     return Graph.add_node(self, id, *args, **kwargs)

Example #24

0

Show file

File: bing_graph.py Project: nikisix/mm_patterns

from pattern.web    import Bing, plaintext
from pattern.en     import parsetree
from pattern.search import search
from pattern.graph  import Graph
 
g = Graph()
for i in range(10):
#    for result in Bing().search('"more important than"', start=i+1,
    for result in Bing().search('"is less important than"', start=i+1,
    count=50):
        s = result.text.lower() 
        s = plaintext(s)
        s = parsetree(s)
        #p = '{NP} (VP) more important than {NP}'
        p = '{NP} (VP) is less important than {NP}'
        for m in search(p, s):
            x = m.group(1).string # NP left
            y = m.group(2).string # NP right
            if x not in g:
                g.add_node(x)
            if y not in g:
                g.add_node(y)
            g.add_edge(g[x], g[y], stroke=(0,0,0,0.75)) # R,G,B,A
 
g = g.split()[0] # Largest subgraph.
 
for n in g.sorted()[:40]: # Sort by Node.weight.
    n.fill = (0, 0.5, 1, 0.75 * n.weight)
 
g.export('test', directed=True, weighted=0.6)

Example #25

0

Show file

File: commonsense.py Project: kracekumar/pattern

 def add_edge(self, id1, id2, *args, **kwargs):
     """ Returns a Relation between two concepts (Edge subclass).
     """
     self._properties = None
     kwargs.setdefault("base", Relation)
     return Graph.add_edge(self, id1, id2, *args, **kwargs)

Example #26

0

Show file

File: 07-graphml.py Project: ADA110/Cibus

import os, sys
sys.path.insert(0, os.path.join("..", ".."))

from pattern.graph import Graph, WEIGHT, CENTRALITY, DEGREE, DEFAULT
from random import choice, random

# This example demonstrates how a graph visualization can be exported to GraphML,
# a file format that can be opened in Gephi (https://gephi.org).

g = Graph()
# Random nodes.
for i in range(50):
    g.add_node(i)
# Random edges.
for i in range(75):
    node1 = choice(g.nodes)
    node2 = choice(g.nodes)
    g.add_edge(node1, node2, weight=random())

g.prune(0)

# This node's label is different from its id.
g[1].text.string = "home"

# By default, Graph.export() exports to HTML,
# but if we give it a filename that ends in .graphml it will export to GraphML.
g.export(os.path.join(os.path.dirname(__file__), "test.graphml"))

Example #27

0

Show file

File: commonsense.py Project: kracekumar/pattern

 def remove(self, x):
     self._properties = None
     Graph.remove(self, x)

Example #28

0

Show file

from __future__ import print_function
import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))

from pattern.graph import Graph, CENTRALITY

# A graph is a network of nodes (or concepts)
# connected to each other with edges (or links).

g = Graph()
for n in ("tree", "nest", "bird", "fly", "insect", "ant"):
    g.add_node(n)

g.add_edge("tree", "nest")  # Trees have bird nests.
g.add_edge("nest", "bird")  # Birds live in nests.
g.add_edge("bird", "fly")   # Birds eat flies.
g.add_edge("ant", "bird")   # Birds eat ants.
g.add_edge("fly", "insect")  # Flies are insects.
g.add_edge("insect", "ant")  # Ants are insects.
g.add_edge("ant", "tree")   # Ants crawl on trees.

# From tree => fly: tree => ant => bird => fly
print(g.shortest_path(g.node("tree"), g.node("fly")))
print(g.shortest_path(g.node("nest"), g.node("ant")))
print()

# Which nodes get the most traffic?
for n in sorted(g.nodes, key=lambda n: n.centrality, reverse=True):
    print('%.2f' % n.centrality, n)

Example #29

0

Show file

File: Python_Data_Science_Pattern_Graph.py Project: VakinduPhilliam/Python_Data_Science

# statistical algorithms.
# Pattern is a web mining module for the Python programming language.
# It has tools for data mining (Google, Twitter and Wikipedia API, a web crawler, a HTML DOM parser), natural
# language processing (part-of-speech taggers, n-gram search, sentiment analysis, WordNet), machine learning
# (vector space model, clustering, SVM), network analysis and <canvas> visualization.
# pattern.graph
# The pattern.graph module provides a graph data structure that represents relations between nodes (e.g., terms, concepts).
# Graphs can be exported as HTML <canvas> animations (demo). In the example below, more central nodes
#  (= more incoming traffic) are colored in blue.

from pattern.web    import Bing, plaintext
from pattern.en     import parsetree
from pattern.search import search
from pattern.graph  import Graph

g = Graph()

    for i in range(10):

        for result in Bing().search('"more important than"', start=i+1, count=50):

            s = r.text.lower() 
            s = plaintext(s)
            s = parsetree(s)
            p = '{NP} (VP) more important than {NP}'

            for m in search(p, s):
                x = m.group(1).string # NP left
                y = m.group(2).string # NP right

                if x not in g:

Example #30

0

Show file

File: 01-graph.py Project: Dirklectisch/cityment

import os, sys; sys.path.insert(0, os.path.join("..", "..", ".."))

from pattern.graph import Graph, CENTRALITY

# Simple Graph demonstration.

g = Graph()
for n in ("tree", "nest", "bird", "fly", "insect", "ant"):
    g.add_node(n)
    
g.add_edge("tree", "nest")
g.add_edge("nest", "bird")
g.add_edge("bird", "fly")
g.add_edge("fly", "insect")
g.add_edge("insect", "ant")
g.add_edge("ant", "tree")
g.add_edge("ant", "bird")

print g.shortest_path(g.node("tree"), g.node("fly"))
print g.shortest_path(g.node("nest"), g.node("ant"))
print

# Which nodes get the most traffic?
print g.sorted(order=CENTRALITY)

Example #31

0

Show file

File: 03-template.py Project: AnthonyNystrom/pattern

</html>
'''.strip()

def webpage(graph, head="", style="", body=("",""), **kwargs):
    """ The head, style and body parameters can be used to insert custom HTML in the template.
        You can pass any optional parameter that can also be passed to render().
    """
    s1 = render(graph, type=STYLE,  **kwargs)
    s2 = render(graph, type=CANVAS, **kwargs)
    # Fix HTML source indentation:
    # f1 = indent each line
    # f2 = indent first line
    f1 = lambda s, t="\t": s.replace("\n","\n"+t)
    f2 = lambda s, t="\t": ("\n%s%s" % (t,s.lstrip())).rstrip()
    return template % (
        f2(head), f1(s1), f2(style, "\t\t"), f1(body[0]), f1("\n"+s2), f2(body[1]))

# Create a graph:
g = Graph()
g.add_node("cat")
g.add_node("dog")
g.add_edge("cat", "dog")

# To make this work as a cgi-bin script, uncomment the following lines:
##!/usr/bin/env python
#import cgi
#import cgitb; cgitb.enable() # Debug mode.
#print "Content-type: text/html"

print webpage(g, width=300, height=300)

Example #32

0

Show file

File: 07-graphml.py Project: DataBranner/pattern

import os
import sys
sys.path.insert(0, os.path.join("..", ".."))

from pattern.graph import Graph, WEIGHT, CENTRALITY, DEGREE, DEFAULT
from random import choice, random

# This example demonstrates how a graph visualization can be exported to GraphML,
# a file format that can be opened in Gephi (https://gephi.org).

g = Graph()
# Random nodes.
for i in range(50):
    g.add_node(i)
# Random edges.
for i in range(75):
    node1 = choice(g.nodes)
    node2 = choice(g.nodes)
    g.add_edge(node1, node2,
               weight=random())

g.prune(0)

# This node's label is different from its id.
# FIXME this fails if the 1 has been pruned
# g[1].text.string = "home"

# By default, Graph.export() exports to HTML,
# but if we give it a filename that ends in .graphml it will export to GraphML.
g.export(os.path.join(os.path.dirname(__file__), "test.graphml"))

Example #33

0

Show file

File: perception.py Project: OAlm/the_stromberg_stories

# relations (e.g., "is-a", "is-part-of", "is-property-of", ...)

# The data was collected manually and consists of about 10,000
# triples (concept1 -> relation -> concept2).
# The visual tool for adding new triples is online at:
# http://nodebox.net/perception

# The data is bundled in Pattern as a .csv file.

from pattern.graph import MODULE # path to pattern/graph/commonsense.csv
data = pd(MODULE, "commonsense.csv")
data = Datasheet.load(data)

# Create the graph:

g = Graph()
for concept1, relation, concept2, context, weight in data:
    g.add_node(concept1)
    g.add_node(concept2)
    g.add_edge(concept1, concept2, type=relation, weight=min(int(weight) * 0.1, 1.0))

# ------------------------------------------------------------------------------------

# The halo of a node is a semantical representation of a concept.
# The halo is made up of other concepts directly or indirectly related to this concept,
# defining it. 

# For example:
#
# - Darth Vader is-a Sith
# - Darth Vader is-part-of Death Star

Example #34

0

Show file

File: ConceptGraph.py Project: Darth-Neo/nl_lib

class PatternGraph(ConceptGraph):
    g = None

    def __init__(self, homeDir=None):
        super(self.__class__, self).__init__()

        if homeDir is None:
            homeDir = os.getcwd()
            
        self.homeDir = homeDir + os.sep + u"html"
        
        if not os.path.exists(self.homeDir):
            os.makedirs(self.homeDir)
            
        self.g = Graph()

    def addNode(self, n):
        self.g.add_node(n.name)

    def addEdge(self, p, c):

        self.g.add_edge(p.name, c.name, stroke=(0, 0, 0, 0.75))  # R,G,B,A
    
    def exportGraph(self, title=u"Pattern Graph"):
        logger.debug(u"exportGraph")
        
        logger.info(u"Graph Size: %d" % self.g.__len__())
        
        k = self.subGraph()
        
        # Iterate through a list of unconnected subgraphs
        if len(k) > 5:
            klimit = 5
        else:
            klimit = len(k)
            
        for i in range(0, klimit):
            logger.debug(u"Graph[%d]=%d" % (i, len(k[i])))
            newDir = self.homeDir + os.sep + u"graph" + str(i)
            h = k[i] 
            h.export(newDir, overwrite=True, directed=True, weighted=0.5, title=title)
            i += 1
        
    def subGraph(self):
        # Take the largest subgraph.
        h = self.g.split()[0]
        
        # Sort by Node.weight.i = 1
        i = 0
        newGraph = Graph()
        for n in h.sorted()[:30]:
            i += 1
            n.fill = (0, 0.5, 1, 0.75 * n.weight)
            logger.debug(u"i:%d=%s" % (i, n))
            newGraph.add_node(n.id)
            logger.debug(u"edges : %s" % n.edges)
    
            for e in n.edges:
                logger.debug(u"edge1 : %s, edge2 : %s" % (e.node1.id, e.node2.id))
                if e.node1.id == n.id:
                    newGraph.add_node(e.node2.id)
                else:
                    newGraph.add_node(e.node1.id)
                newGraph.add_edge(e.node1.id, e.node2.id, stroke=(0, 0, 0, 0.75))
        
        h = newGraph.split()
        
        return h

Example #35

0

Show file

File: crawler.py Project: Jack53416/WebMining

class WebCrawler():
    def __init__(self, args, depth=1):
        self.links = [WebPage(x) for x in args.url]
        self.depth = depth
        self.historyDb = WebsiteDatabase()
        self.done = False
        self.options = args
        self.results = {link.url.domain: Result() for link in self.links}

        self.cloudIndexer = CloudSearchIndexer.forDomainIndex("websites")

        if args.graph or args.rank:
            self.webGraph = Graph(distance=30.0)
            for link in self.links:
                self.webGraph.add_node(link.url.domain,
                                       radius=15,
                                       fill=(1, 0, 0, 0.5))

    def __del__(self):
        self.cloudIndexer._commitToAmazon()

    def crawl(self):
        if len(self.links) < 1:
            self.done = True
            self.finish()
            return

        site = self.links.pop(0)

        if self.historyDb.wasPageVisited(site):
            print 'reading data'
            site = self.historyDb.readWebPage(site.url.string,
                                              isExternal=site.isExternal,
                                              depth=site.depth)
        else:
            print 'downloading'
            try:
                site.downloadContent()
            except HTTP404NotFound:
                return self.fail(site, "404 not found")
            except URLTimeout:
                return self.fail(site, "Timeout error")
            except URLError as err:
                return self.fail(site, str(err))

        connected = True
        if site.depth == self.depth:
            connected = False
        self.historyDb.insertWebpage(site, connection=connected)
        self.historyDb.appendSession(site)

        for link in site.getLinks():
            if self.isValidForQueue(link):
                if link.isExternal and (self.options.graph
                                        or self.options.rank):
                    self.addDomainNode(link)
                    if site.depth < self.depth:
                        self.links.append(link)
                elif not link.isExternal and site.depth < self.depth:
                    self.links.insert(0, link)

        if not self.historyDb.wasPageVisited(site):
            self.visit(site)
        site.cleanCashedData()

    def isValidForQueue(self, link):
        if link not in self.links and not link.url.anchor:
            if self.historyDb.isInThisSession(link):
                self.historyDb.insertRelation(link.parent, link)
            else:
                return True
        return False

    def addDomainNode(self, page):
        match = re.search("\.", page.url.domain)
        if not match:
            return
        if page.parent.url.domain == page.url.domain:
            return
        if self.webGraph.node(page.url.domain) is None:
            self.webGraph.add_node(page.url.domain, radius=15)
        if self.webGraph.edge(page.parent.url.domain, page.url.domain) is None:
            self.webGraph.add_edge(page.parent.url.domain,
                                   page.url.domain,
                                   weight=0.0,
                                   type='is-related-to')

    def visit(self, page):
        print 'visited: ', page.url.string, ' domain: ', page.url.domain, 'graph', self.options.graph
        self.cloudIndexer.addDocument(page)

        if page.isExternal and self.options.graph and page.url.domain not in self.results.keys(
        ):
            self.webGraph.node(page.url.domain).fill = (0, 1, 0, 0.5)
        try:
            if self.options.text:
                self.results[page.url.domain].wordStats += page.countWords()
            if self.options.a:
                links = [link.url.string for link in page.getLinks()]
                self.results[page.url.domain].links.update(links)
            if self.options.image:
                self.results[page.url.domain].images.update(page.getImages())
            if self.options.script:
                self.results[page.url.domain].scripts.update(page.getScripts())
        except Exception as e:
            print "Error parsing document: ", type(e).__name__ + ': ' + str(e)

    def fail(self, link, error):
        print 'failed:', link.url.string, 'err: ', error

    def finish(self):
        """Print all results and calculate cosine similarity between all provided ur;s"""
        self.historyDb.clearSession()
        with Emitter(self.options.console, self.options.file) as output:
            for key, value in self.results.iteritems():
                output.emitLine(key)
                value.emit(output)

            if len(self.results
                   ) > 1 and self.options.text and self.options.cos:
                combinations = [
                    list(x)
                    for x in itertools.combinations(self.results.keys(), 2)
                ]
                for pair in combinations:
                    cosValue = self.results[pair[0]].cosineSimilarity(
                        self.results[pair[1]])
                    output.emitLine(
                        u"cos similarity between:{0} and {1} = {2}".format(
                            pair[0], pair[1], cosValue))

            output.emitLine('')
            #output.emitLine("max depth: " + str(max(site.depth for site in self.history)))
            #output.emitLine("sites visited: " + str(len(self.history)))

            if self.options.graph:
                self.webGraph.eigenvector_centrality()
                self.webGraph.export('graph',
                                     directed=True,
                                     width=2200,
                                     height=1600,
                                     repulsion=10)
            if self.options.rank:
                ranks = self.calculatePageRank()
                output.emitLine('')
                output.emit(ranks)

    def calculatePageRank(self):
        adjMap = adjacency(self.webGraph, directed=True, stochastic=True)
        domains = adjMap.keys()
        M = np.zeros((len(domains), len(domains)))
        for idx, domain in enumerate(domains):
            connections = adjMap[domain].keys()
            for connection in connections:
                M[idx, domains.index(connection)] = adjMap[domain][connection]

        M = np.transpose(M)
        #M = np.array([[0,0,0,0,1], [0.5,0,0,0,0], [0.5,0,0,0,0], [0,1,0.5,0,0], [0,0,0.5,1,0]])
        #M = np.array([[0,  0.5, 0],[0.5,0.5, 0],  [0.5, 0,  0]])
        pageScores = self.executeComputations(M)
        print pageScores
        ranks = dict(zip(domains, pageScores))
        ranks = sorted(ranks.items(), key=operator.itemgetter(1))
        return ranks

    def executeComputations(self, M):
        damping = 0.80
        error = 0.0000001
        N = M.shape[0]
        v = np.ones(N)
        v = v / np.linalg.norm(v, 1)
        last_v = np.full(N, np.finfo(float).max)
        for i in range(0, N):
            if sum(M[:, i]) == 0:
                M[:, i] = np.full(N, 1.0 / N)

        M_hat = np.multiply(M, damping) + np.full((N, N), (1 - damping) / N)
        while np.linalg.norm(v - last_v) > error:
            last_v = v
            v = np.matmul(M_hat, v)

        return np.round(v, 6)

Example #36

0

Show file

print
# - 43, Doctor Who (seems to be linked to almost every trope?)
# - 34, Buffy the Vampire Slayer
# - 20, The X-Files
# - ...

# ------------------------------------------------------------------------------------

# Another approach is to create a network of movies linked by tropes (or vice versa).
# A network can be represented as a graph with nodes (= things)
# and edges (connections between things).
# http://www.clips.ua.ac.be/pages/pattern-graph

from pattern.graph import Graph

g = Graph()
for movie, tropes in movies.items():
    g.add_node(movie)
    for trope in tropes:
        g.add_node(trope)
        g.add_edge(movie, trope)  # connection between movie <=> trope

# What nodes directly connect to a given trope?
for node in g["Teach Him Anger"].links:
    print node

# What is the shortest path between two nodes in the network?
print
print g.shortest_path("Cinderella", "Alien")

# Cinderella => Race Against the Clock => The X-Files => Absurdly Spacious Sewer => Alien

Example #37

0

Show file

def get_pattern_data(search_param):

    twitter = Twitter(language='en')

    for tweet in twitter.search(search_param, cached=True):
        print(plaintext(tweet.text).encode('ascii', 'ignore').decode('utf-8'))

    g = Graph()
    for i in range(10):
        for result in twitter.search(search_param, start=i + 1, count=50):
            s = result.text.lower()
            s = plaintext(s)
            s = parsetree(s)
            p = '{NP} (VP) ' + search_param + ' {NP}'
            for m in search(p, s):
                x = m.group(1).string  # NP left
                y = m.group(2).string  # NP right
                if x not in g:
                    g.add_node(x)
                    if y not in g:
                        g.add_node(y)
                    g.add_edge(g[x], g[y], stroke=(0, 0, 0, 0.75))  # R,G,B,A

    #if len(g)>0:
    #   g = g.split()[0] # Largest subgraph.

    for n in g.sorted()[:40]:  # Sort by Node.weight.
        n.fill = (0, 0.5, 1, 0.75 * n.weight)

    g.export('data', directed=False, weighted=0.6)

Example #38

0

Show file

File: log_stat_graph.py Project: sangheestyle/analyze_repo

 def __init__(self, name=None):
     self.name = None
     self.graph = Graph()

Example #39

0

Show file

import os, sys
sys.path.append(os.path.join("..", "..", ".."))

from pattern.graph import Graph, CENTRALITY

# Simple Graph demonstration.

g = Graph()
for n in ("tree", "nest", "bird", "fly", "insect", "ant"):
    g.add_node(n)

g.add_edge("tree", "nest")
g.add_edge("nest", "bird")
g.add_edge("bird", "fly")
g.add_edge("fly", "insect")
g.add_edge("insect", "ant")
g.add_edge("ant", "tree")
g.add_edge("ant", "bird")

print g.shortest_path(g.node("tree"), g.node("fly"))
print g.shortest_path(g.node("nest"), g.node("ant"))
print

# Which nodes get the most traffic?
print g.sorted(order=CENTRALITY)

Example #40

0

Show file

File: 01-graph.py Project: clips/pattern

from __future__ import print_function
from __future__ import unicode_literals

from builtins import str, bytes, dict, int

import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))

from pattern.graph import Graph, CENTRALITY

# A graph is a network of nodes (or concepts)
# connected to each other with edges (or links).

g = Graph()
for n in ("tree", "nest", "bird", "fly", "insect", "ant"):
    g.add_node(n)

g.add_edge("tree", "nest")  # Trees have bird nests.
g.add_edge("nest", "bird")  # Birds live in nests.
g.add_edge("bird", "fly")   # Birds eat flies.
g.add_edge("ant", "bird")   # Birds eat ants.
g.add_edge("fly", "insect") # Flies are insects.
g.add_edge("insect", "ant") # Ants are insects.
g.add_edge("ant", "tree")   # Ants crawl on trees.

# From tree => fly: tree => ant => bird => fly
print(g.shortest_path(g.node("tree"), g.node("fly")))
print(g.shortest_path(g.node("nest"), g.node("ant")))
print()

Example #41

0

Show file

File: Semantic net.py Project: Hossain-Shah/Project

from pattern.graph import Graph
g = Graph()
g.add_edge('doll', 'toy', type='is-a')
g.add_edge('silent', 'doll', type='is-property-of')
g.add_edge('doll', 'girl', type='is-related-to')
node = g['doll']
print(node.id)
print(node.links)