Beispiel #1
0
def import_api_data2():
    authenticate("localhost:7474", "neo4j", "1111")
    graph = Graph()
    #graph.delete_all()

    # Uncomment on the first run!
    #graph.schema.create_uniqueness_constraint("Borjnuk", "id")
    #graph.schema.create_uniqueness_constraint("Obtaj", "id")
    #graph.schema.create_uniqueness_constraint("Property", "id")

    obtajenna = get_objects_art('obtaj')

    for api_obtaj in obtajenna:

        node_obtaj= graph.merge_one("Obtaj", "id", api_obtaj["id"])
        node_obtaj["reason_doc"] = api_obtaj["reason_doc"]
        node_obtaj["cost_size"] = api_obtaj["cost_size"]

        for api_author in api_obtaj["borjnuku"]:
            node_borjnuk = graph.merge_one("Borjnuk", "id", api_author["id"])
            node_borjnuk["name"] = api_author["name"]
            node_borjnuk["tel_number"] = api_author["tel_number"]
            node_borjnuk.push()
            graph.create_unique(Relationship(node_borjnuk, "obtajuetsa", node_obtaj))

        for api_property in api_obtaj["properties"]:
            node_property = graph.merge_one("Property", "id", api_property["id"])
            node_property["name"] = api_property["name_property"]
            node_property["ser_number"] = api_property["ser_number"]
            node_property.push()
            graph.create_unique(Relationship(node_property, "zakladena", node_obtaj))
        node_obtaj.push()
def import_api_data():
    """
    imports data from my register (method and all adjacent) into graph DB
    """

    graph = Graph()
    # Uncomment on the first run!
    # graph.schema.create_uniqueness_constraint("Method", "id")
    # graph.schema.create_uniqueness_constraint("Author", "id")
    # graph.schema.create_uniqueness_constraint("Category", "id")

    methods = get_objects('method')

    for api_method in methods:

        node_method = graph.merge_one("Method", "id", api_method["id"])
        node_method["name"] = api_method["name"]
        node_method["creation_date"] = api_method["creation_date"]
        node_method["approval_date"] = api_method["approval_date"]

        for api_author in api_method["authors"]:
            node_author = graph.merge_one("Author", "id", api_author["id"])
            node_author["name"] = api_author["name"]
            node_author.push()
            graph.create_unique(Relationship(node_author, "WROTE", node_method))

        api_category = api_method["category"]
        node_category = graph.merge_one("Category", "id", api_category["id"])
        node_category["name"] = api_category["name"]
        node_category.push()
        graph.create_unique(Relationship(node_category, "CONTAINS", node_method))
        node_method.push()
def import_api2_data():
    """
    imports data from second register (experts and all adjacent)

    """
    graph = Graph()
    # Uncomment on first run!
    # graph.schema.create_uniqueness_constraint("Expert", "id")
    # graph.schema.create_uniqueness_constraint("Document", "id")
    # graph.schema.create_uniqueness_constraint("Comission_order", "id")
    # graph.schema.create_uniqueness_constraint("Legal_issue", "id")
    # graph.schema.create_uniqueness_constraint("Expertise", "id")

    experts = get_objects2("experts")

    for api_expert in experts:
        node_expert = graph.merge_one("Expert", "id", api_expert["id"])
        node_expert["name"] = api_expert["name"]
        node_expert["workplace"] = api_expert["workplace"]
        node_expert["address"] = api_expert["address"]
        node_expert["phone"] = api_expert["phone"]

        for api_document in api_expert["documents"]:
            node_document = graph.merge_one("Document", "id", api_document["id"])
            node_document["id_doc"] = api_document["id_doc"]
            node_document["release_date"] = api_document["release_date"]
            node_document["expiry_date"] = api_document["expiry_date"]
            node_document["document_type"] = api_document["document_type"]
            node_document.push()
            graph.create_unique(Relationship(node_expert, "SIGNED", node_document))

        for api_order in api_expert["commission_orders"]:
            node_order = graph.merge_one("Comission_order", "id", api_order["id"])
            node_order["commission_name"] = api_order["commission_name"]
            node_order["order_number"] = api_order["order_number"]
            node_order["order_date"] = api_order["order_date"]
            node_order.push()
            graph.create_unique(Relationship(node_order, "APPOINTED", node_expert))

            for api_expertise in api_order["expertises"]:
                node_expertise = graph.merge_one("Category", "id", api_expertise["id"])
                node_expertise["name"] = node_expertise["name"]
                node_expertise.push()
                graph.create_unique(Relationship(node_order, "INCLUDES", node_expertise))





        for api_issue in api_expert["legal_issues"]:
            node_issue = graph.merge_one("Legal_issue", "id", api_issue["id"])
            node_issue["description"] = api_issue["description"]
            node_issue["date"] = api_issue["date"]
            node_issue.push()
            graph.create_unique(Relationship(node_expert, "WORKED_ON", node_issue))



        node_expert.push()
    def upload_tweets(users):
        graph = Graph()

        for t in users:
            u = t['user']
            e = t['password']

            users = graph.merge_one("user","id", t['id'])
            users.properties['text']= t['text']

            user = graph.merge_one("User","username", u["screen_name"])
Beispiel #5
0
def moveConceptMap():
    # neo4j graph connector
    graph = Graph()
    # load concept map from production

    import requests

   
    url = "https://api.ekstep.in/learning/v2/domains/numeracy/concepts"

    payload = "-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"file\"\r\n\r\n\r\n-----011000010111000001101001--"
    headers = {
        'content-type': "multipart/form-data; boundary=---011000010111000001101001",
        'user-id': "rayuluv",
        'cache-control': "no-cache",
        'postman-token': "96bc4304-3f9b-6de5-6143-4c14507fe0a5"
        }

    resp = requests.request("GET", url, data=payload, headers=headers).json()

    # move all concepts
    conceptList = resp["result"]["concepts"]
    for conceptDict in conceptList:
        identifier=None
    
        if(not conceptDict.has_key('identifier')):
            continue

        identifier = conceptDict['identifier']
        # create/find node
        node = graph.merge_one("Concept","id",identifier)

        if(conceptDict.has_key('subject')):
            subject = conceptDict['subject']
            node.properties["subject"]=subject
            node.push()

        if(conceptDict.has_key('objectType')):
            objectType = conceptDict['objectType']
            node.properties["objectType"]=objectType
            node.push()
    
        if(conceptDict.has_key('children')):
            relationList=conceptDict['children']
            for relationDict in relationList:
                if (not relationDict.has_key('identifier') ):
                    continue
                if (not relationDict.has_key('relation') ):
                    continue
                node1 = graph.merge_one("Concept","id",relationDict['identifier'])
                relationType=relationDict['relation']
                graph.create(Relationship(node, relationType, node1))
Beispiel #6
0
class VbplPipeline(object):
	def __init__(self):
		authenticate("localhost:7474", "neo4j", "123456")
		self.graph = Graph()
	def process_item(self, item, spider):
		document = item['document']
		histories = item['histories']
		related_documents = item['related_documents']

		# Create document node
		document_node = self.graph.merge_one("LegalNormativeDocument", "id", document['document_id'])
		document_node.properties['content'] = document.get('content', '')
		document_node.properties['title'] = document.get('title','')
		document_node.properties['official_number'] = document.get('official_number','')
		document_node.properties['legislation_type'] = document.get('legislation_type','')
		document_node.properties['source'] = document.get('source','')
		document_node.properties['department'] = document.get('department', '')
		document_node.properties['issuing_office'] = document.get('issuing_office', '')
		document_node.properties['effective_area'] = document.get('effective_area','')
		document_node.properties['effective_date'] = document.get('effective_date', '')
		document_node.properties['gazette_date'] = document.get('gazette_date', '')
		document_node.properties['field'] = document.get('field', '')
		document_node.properties['signer_title'] = document.get('signer_title', '')
		document_node.properties['signer_name'] = document.get('signer_name', '')
		document_node.push()


		for history in histories:
			history_node = self.graph.merge_one("History", "id", history['history_id'])
			# history_node.properties['document_id'] = history['document_id']
			history_node.properties['title'] = history.get('title', '')
			history_node.properties['date'] = history.get('date', '')
			history_node.properties['status'] = history.get('status', '')
			history_node.properties['original_document'] = history.get('original_document', '')
			history_node.properties['ineffective_part'] = history.get('ineffective_part', '')
			history_node.push()

			# Add 'HAS' relationship
			self.graph.create_unique(Relationship(document_node, "HAS", history_node))

		for related_document in related_documents:
			# related_document_node.properties['document_id'] = related_document['document_id']
			related_document_node = self.graph.merge_one("RelatedDocument", "id", related_document['related_document_id'])
			related_document_node.properties['title'] = related_document.get('title', '')
			related_document_node.properties['relating_type'] = related_document.get('relating_type', '')
			related_document_node.push()

			# Add "HAS" relationship
			self.graph.create_unique(Relationship(document_node, "HAS", related_document_node))

		return item
Beispiel #7
0
def moveContentSummaryTable():
    graph = Graph()

    lids = session.execute("SELECT DISTINCT learner_id from learnercontentsummary")
    for lid in lids:
        uid = lid['learner_id']
        node = graph.merge_one("Learner","id",uid)

        
        contentDict = session.execute("SELECT * from learnercontentsummary WHERE learner_id='" + uid + "'")[0]
        cid = contentDict['content_id']
        tsp = contentDict['time_spent']
        ipm = contentDict['interactions_per_min']

        node2 = graph.merge_one("Content","id",cid)
        # add a relationship with property score
        graph.create(Relationship(node, "INTERACTED_WITH", node2,timeSpent=tsp,ipm=ipm))
def main1():
    authenticate("localhost:7474", "neo4j", "1234")
    graph = Graph(GRAPH_CONNECTION_STRNIG)

    graph.delete_all()

    banana = Node("Fruit", name="banana", colour="yellow", tasty=True)
    graph.create(banana)

    t = graph.merge_one("Fruit", 'name', 'apple')
    t['colour'] = 'green'
    t['tasty'] = True
    t.push()
Beispiel #9
0
def moveProficiencyTable():
    # get a list of all unique learners
    # neo4j graph connector
    graph = Graph()
    
    lids = session.execute("SELECT DISTINCT learner_id from learnerproficiency")
    for lid in lids:
        # get the knowledge state for this guy
        # <concept-id>,<socre> in schema
            
        uid = lid['learner_id']
        # create a learner node
        node = graph.merge_one("Learner","id",uid)

        profDict = session.execute("SELECT proficiency from learnerproficiency WHERE learner_id='" + uid + "'")[0]['proficiency']
        paramDict= session.execute("SELECT model_params from learnerproficiency WHERE learner_id='" + uid + "'")[0]['model_params']

        for cid, score in profDict.items():
            # create/find concept node
            node2 = graph.merge_one("Concept","id",cid)
            alpha=float(paramDict[cid][9:12])
            beta=paramDict[cid][20]
            # add a relationship with property score
            graph.create(Relationship(node, "ASSESSED_IN", node2,score=score,alpha=alpha,beta=beta))
Beispiel #10
0
def main2():
    authenticate("localhost:7474", "neo4j", "1234")
    graph = Graph(GRAPH_CONNECTION_STRNIG)

    graph.delete_all()

    banana = Node("Fruit", name="banana", colour="yellow", tasty=True)
    graph.create(banana)

    t = graph.merge_one("Fruit", 'name', 'apple')
    t['colour'] = 'green'
    t['tasty'] = True
    t.push()

    alice = Node("Person", name="Alice")
    bob = Node("Person", name="Bob")
    alice_knows_bob = Relationship(alice, "KNOWS", bob, since=1999)
    graph.create(alice)
    graph.create(bob)
    graph.create(alice_knows_bob)
# ##Spaghetti Bolognese

# In[15]:

graph_db.cypher.execute(
    "MATCH (REC1:Recipe{Name:'Spaghetti Bolognese'})-[r:Contains]->(ING:Ingredient) RETURN REC1.Name, ING.Name;"
)

# #Recommendation

# ##Add User

# In[4]:

UserNode = graph_db.merge_one("User", "Name", "Ragnar")

# ##Add User likes

# In[5]:

UserRef = graph_db.find_one("User",
                            property_key="Name",
                            property_value="Ragnar")  #look for user Ragnar

# In[6]:

RecipeRef = graph_db.find_one(
    "Recipe", property_key="Name",
    property_value="Spaghetti Bolognese")  #look for recipe Spaghetti Bolognese
NodesRelationship = Relationship(UserRef, "Likes",
Beispiel #12
0
class StuffNeo4j():
    def __init__(self, nodelabel, reltype):
        self.graph_db = None
        self.nodelabel = nodelabel
        self.reltype = reltype

    def connect(self, uri, usr="******", pwd="neo4j"):
        if not uri.endswith('/'):
            uri += '/'
        authenticate(uri, usr, pwd)
        self.graph_db = Graph(uri + "db/data")

    def create_indexes(self):
        #If index is already created py2neo throws exception.
        try:
            self.graph_db.cypher.execute("CREATE INDEX ON :%s(name)" %
                                         self.nodelabel)
        except:
            pass
        try:
            self.graph_db.cypher.execute("CREATE INDEX ON :%s(synset_id)" %
                                         self.nodelabel)
        except:
            pass
        try:
            self.graph_db.cypher.execute(
                "CREATE INDEX ON :%s(pointer_symbol)" % self.reltype)
        except:
            pass

    def create_node(self, nodetype, **kwargs):
        return Node(nodetype, **kwargs)

    def merge_node(self, nodetype, uniq_key, uniq_val, **kwargs):
        n = self.graph_db.merge_one(nodetype, uniq_key, uniq_val)
        for k in kwargs:
            n.properties[k] = kwargs[k]
        n.push()
        return n

    def insert_rel(self, reltype, node1, node2, **kwargs):
        if node1 is not None and node2 is not None:
            rel = Relationship(node1, reltype, node2, **kwargs)
            self.graph_db.create(rel)
        else:
            print "Could not insert relation (%s) - [%s] -> (%s)" % (
                node1, reltype, node2)

    def merge_rel(self, reltype, node1, node2, **kwargs):
        if node1 is not None and node2 is not None:
            rel = Relationship(node1, reltype, node2, **kwargs)
            return self.graph_db.create_unique(rel)
        else:
            print "Could not merge relation (%s) - [%s] -> (%s)" % (
                node1, reltype, node2)

    def create_wordnet_rel(self, synset1, synset2, ptype):
        """
        Pointer symbols
        http://wordnet.princeton.edu/wordnet/man/wninput.5WN.html
        
         The pointer_symbol s for nouns are:
        
            !    Antonym
            @    Hypernym
            @i    Instance Hypernym
             ~    Hyponym
             ~i    Instance Hyponym
            #m    Member holonym
            #s    Substance holonym
            #p    Part holonym
            %m    Member meronym
            %s    Substance meronym
            %p    Part meronym
            =    Attribute
            +    Derivationally related form        
            ;c    Domain of synset - TOPIC
            -c    Member of this domain - TOPIC
            ;r    Domain of synset - REGION
            -r    Member of this domain - REGION
            ;u    Domain of synset - USAGE
            -u    Member of this domain - USAGE
        
        The pointer_symbol s for verbs are:
        
            !    Antonym
            @    Hypernym
             ~    Hyponym
            *    Entailment
            >    Cause
            ^    Also see
            $    Verb Group
            +    Derivationally related form        
            ;c    Domain of synset - TOPIC
            ;r    Domain of synset - REGION
            ;u    Domain of synset - USAGE
        
        The pointer_symbol s for adjectives are:
        
            !    Antonym
            &    Similar to
            <    Participle of verb
            \    Pertainym (pertains to noun)
            =    Attribute
            ^    Also see
            ;c    Domain of synset - TOPIC
            ;r    Domain of synset - REGION
            ;u    Domain of synset - USAGE
        
        The pointer_symbol s for adverbs are:
        
            !    Antonym
            \    Derived from adjective
            ;c    Domain of synset - TOPIC
            ;r    Domain of synset - REGION
            ;u    Domain of synset - USAGE 
        """
        node1 = self.graph_db.find_one(self.nodelabel,
                                       property_key="synset_id",
                                       property_value=synset1)
        node2 = self.graph_db.find_one(self.nodelabel,
                                       property_key="synset_id",
                                       property_value=synset2)
        if (node1 is not None) and (node2 is not None):
            rel = Relationship(node1,
                               self.reltype,
                               node2,
                               pointer_symbol=ptype)
            return rel
        else:
            raise Exception(
                "Could not create Wordnet relation (%s) - [%s] -> (%s)" %
                (synset1, ptype, synset2))

    def insert_bulk(self, objs):
        if len(objs) > 0:
            self.graph_db.create(*objs)
        print(record['FULL Name'])
'''
with open('../data/tmp.json') as journal_article_file:
    journal_structure = json.load(journal_article_file)

j_list = []
a_list = []
#print type(journal_structure["ACM"])
print(journal_structure["ACM"]["JDIQ"]["Volume5"]["Issue4"]["articles"]["Article No.: 13"]["references"][1])

#print(journal_structure["ACM"]["JDIQ"])

for journal_key, journal_value in journal_structure["ACM"].items():
    j_list.append(journal_key)
    print(journal_key)
    journal_to_be_added = graph.merge_one("Journal", "name", journal_key)
    for volume_key, volume_value in journal_value.items():
        print('\t' + volume_key)
        for issue_key, issue_value in volume_value.items():
            #print('\t\t' + issue_key),
            #print("key")
            for issue_attributes_key, issue_attributes_value in issue_value.items():
                #print(issue_attributes_key),
                #print("attributes")

                if issue_attributes_key in "articles":
                    for article_key, article_value in issue_attributes_value.items():
                        title    = journal_structure["ACM"][journal_key][volume_key][issue_key][issue_attributes_key][article_key]["title"]
                        abstract = journal_structure["ACM"][journal_key][volume_key][issue_key][issue_attributes_key][article_key]["abstract"]
                        authors  = journal_structure["ACM"][journal_key][volume_key][issue_key][issue_attributes_key][article_key]["authors"]
                        doi      = journal_structure["ACM"][journal_key][volume_key][issue_key][issue_attributes_key][article_key]["doi"]
Beispiel #14
0
#/usr/bin/python

# a simple neo4j script to create two nodes and an edge

# setup user name & password
import myconfig
print myconfig.getUser()

from py2neo import Graph, Path, Node, Relationship
graph = Graph("http://"+myconfig.getUser()+":"+myconfig.getPass()+"@localhost:7474/db/data")

# graph.merge_one only creates this node if it has to
subject = graph.merge_one("instance","uniqueid", "E58")
object = graph.merge_one("instance","uniqueid", "billy2")

# only create unique relationships, don't need to keep re-expressing the same thing
# this fails for some reason
#rel = graph.create_unique(Relationship(subject,"bredBy",object))
rel = Relationship(subject,"bredBy",object)

graph.create(rel)

    graph.schema.drop_uniqueness_constraint("Fund", "name")
    graph.schema.drop_uniqueness_constraint("Institute", "name")
    graph.schema.drop_uniqueness_constraint("Person", "name")

    graph.schema.create_uniqueness_constraint("Company", "name")
    graph.schema.create_uniqueness_constraint("Fund", "name")
    graph.schema.create_uniqueness_constraint("Institute", "name")
    graph.schema.create_uniqueness_constraint("Person", "name")

    for row in bsm.rows[1:]:
      from_type, from_name, edge_type, edge_name, to_type, to_name, netlog = [cell.value for cell in row]
      if netlog is None:
        from_type = "grey"
        to_type = "grey"
      print(from_type, from_name, edge_type, to_type, to_name)
      from_node = graph.merge_one(from_type.strip(), "name", from_name.strip())
      to_node = graph.merge_one(to_type.strip(), "name", to_name.strip())
      from_to = Relationship(from_node, edge_type, to_node)
      graph.create_unique(from_to)

    # get nodes with degree
    nodes = []
    for label in graph.node_labels:
      for p in graph.find(label):
        node = {"id": p.ref.split("/")[-1],
                "label": p["name"], 
                "title": p["name"],
                "value": p.degree,
                "group": label}
        nodes.append(node)
    with open("report/nodesnetlog.js", "w") as f:
Beispiel #16
0
class GraphDatabase():
    def __init__(self):
        try:
            self.graph = Graph(
                'http://*****:*****@localhost:7474/db/data')
        except:
            print 'ERROR: Initialize Neo4j browser'
            self.graph.delete_all()

        def createDocumentNode(self, index, label):
            docNode = self.graph.merge_one('Document', 'name',
                                           'Doc ' + str(index))
            self.updateNode(docNode, {
                'id': index,
                'label': label,
                'in-weight': 0,
                'out-weight': 0
            })
            return docNode

        def createFeatureNode(self, index, word):
            wordNode = Node('Feature', word=word)
            self.graph.create(wordNode)
            self.updateNode(wordNode, {
                'in-weight': 0,
                'out-weight': 0,
                'id': index
            })
            return wordNode

        def getFeatureNode(self, word):
            return list(
                self.graph.find('Feature',
                                property_key='word',
                                property_value=word))[0]

        def createWeightedRelation(self, node1, node2, relation):
            match = self.graph.match(start_node=node1,
                                     rel_type=relation,
                                     end_node=node2)
            numberOfRelations = sum(1 for x in match)
            if numberOfRelations >= 1:
                match = self.graph.match(start_node=node1,
                                         rel_type=relation,
                                         end_node=node2)
                for relationship in match:
                    self.increaseWeight(relationship)
                    self.increaseWeight(node1, 'out-weight')
                    self.increaseWeight(node2, 'in-weight')
            else:
                newRelation = Relationship(node1, relation, node2, weight=1)
                self.graph.create(newRelation)
                self.increaseWeight(node1, 'out-weight')
                self.increaseWeight(node2, 'in-weight')

        def increaseWeight(self, entity, weight='weight'):
            entity[weight] = entity[weight] + 1
            self.graph.push(entity)

        def updateNode(self, node, propertyDict):
            node.properties.update(propertyDict)
            self.graph.push(node)

        def normalizeRelationships(self, nodes, relation):
            for node in nodes:
                for rel in node.match_incoming(relation):
                    rel['norm_weight'] = rel['weight'] / node['in-weight']
                    self.graph.push(rel)

        def getNodes(self, feature):
            recordList = self.graph.cypher.execute(
                'MATCH (node:%s) RETURN node' % feature)
            return [record.node for record in recordList]

        def getMatrix(self,
                      nodesX,
                      nodesY=None,
                      relation='followed_by',
                      propertyType='norm_weight'):
            if nodesY == None:
                nodesY = nodesX
            matrix = np.zeros([len(nodesX), len(nodesY)])
            for node in nodesX:
                rowIndex = node['id']
                for outRelation in node.match_outgoing(relation):
                    colIndex = outRelation.end_node['id']
                    weight = outRelation[propertyType]
                    matrix[rowIndex, colIndex] = weight
                return matrix

        def cypherContextSim(self):
            tx = self.graph.cypher.begin()
            tx.append(CONTEXT_SIM)
            tx.commit()
Beispiel #17
0
class Neo4jModel:
    def __init__(self):
        self.graph = Graph()

    def create(self):
        self.graph.schema.create_uniqueness_constraint("Region", "name")
        self.graph.schema.create_uniqueness_constraint("Court", "name")
        self.graph.schema.create_uniqueness_constraint("Court_Decision_Type",
                                                       "name")
        self.graph.schema.create_uniqueness_constraint("Court_Judgement_Type",
                                                       "name")
        self.graph.schema.create_uniqueness_constraint("Case", "id")
        self.graph.schema.create_uniqueness_constraint("Chairman", "name")

    def region(self, region_name):
        __region = self.graph.merge_one("Region", "name", region_name)
        __region.push()
        return __region

    def court(self, court_name, region_name):
        __court = self.graph.merge_one("Court", "name", court_name)
        __court.push()
        self.graph.create_unique(
            Relationship(__court, "SITUATED_IN", self.region(region_name)))
        return __court

    def chairman(self, chairman_name):
        __chairman = self.graph.merge_one("Chairman", "name", chairman_name)
        __chairman.push()
        return __chairman

    def decision_type(self, decision_type_name):
        __decision_type = self.graph.merge_one("Court_Decision_Type", "name",
                                               decision_type_name)
        __decision_type.push()
        return __decision_type

    def judgement_type(self, judgement_type_name):
        __judgement_type = self.graph.merge_one("Court_Judgement_Type", "name",
                                                judgement_type_name)
        __judgement_type.push()
        return __judgement_type

    def case(self, court_case, region_name):
        __case = self.graph.merge_one("Case", "id", court_case.decision_number)
        __case["reg_date"] = __timestamp__(court_case.reg_date)
        __case["law_date"] = __timestamp__(court_case.law_date)
        __case["link"] = court_case.link
        __case["text"] = court_case.text
        __case["case_number"] = court_case.case_number
        self.graph.create_unique(
            Relationship(__case, "RULED_BY",
                         self.court(court_case.court_name, region_name)))
        self.graph.create_unique(
            Relationship(__case, "CARRIED_BY",
                         self.chairman(court_case.chairman)))
        self.graph.create_unique(
            Relationship(__case, "OF_JUDGEMENT_TYPE",
                         self.judgement_type(court_case.vr_type)))
        self.graph.create_unique(
            Relationship(__case, "OF_DECISION_TYPE",
                         self.decision_type(court_case.cs_type)))
        __case.push()
        return __case

    def change_date(self):
        query = "MATCH (n:Case) WHERE NOT (n.law_date='') RETURN n LIMIT 5"
        id_list = []
        for n in self.graph.cypher.execute(query):
            id_list.append(n[0].__str__()[2:].split(':')[0])  # getting an id
        for _id in id_list:
            n = self.graph.node(str(_id))
            n['law_date'] = __timestamp__(n['law_date'])
            n.push()
            print(n)
Beispiel #18
0
def moveContentModel():
    listURL= "https://api.ekstep.in/learning/v2/content/list"

    payload = "{\n  \"request\": { \n      \"search\": {\n          \"fields\": [\"name\", \"contentType\"],\n          \"status\":[\"Live\", \"Draft\", \"Retired\"],\n          \"contentType\":[\"Game\", \"Worksheet\", \"Story\"],\n          \"limit\":2000\n          \n      }\n  }\n}"
    headers = {
        'content-type': "application/json",
        'user-id': "mahesh",
        'cache-control': "no-cache",
        'postman-token': "cec63279-346d-a452-4b13-e3cc0a0c2e4d"
        }

    resp = requests.request("POST", listURL, data=payload, headers=headers).json()
    # neo4j graph connector
    graph = Graph()
  
    # no of content
    contentList = resp["result"]["content"]
    for contentListDict in contentList:
        # check if there is an identifier for this content
        if(not contentListDict.has_key('identifier')):
            continue
    
        # check if there is an identifier for this content
        identifier = contentListDict['identifier']

        # create a node for this Content
        node = graph.merge_one("Content","id",identifier)

        createdOn=None
        languageCode=None
        gradeLevel=None
        identifier=None
        ageGroup=None
        concept=None
        owner=None

        contentDict = contentListDict

        if(contentDict.has_key('languageCode')):
            languageCode = contentDict['languageCode']
            node.properties['languageCode'] = languageCode
            node.push()
    
        if(contentDict.has_key('createdOn')):
            createdOn = contentDict['createdOn']
            node.properties['createdOn'] = createdOn
            node.push()
    

        if(contentDict.has_key('ageGroup')):
            ageGroup = contentDict['ageGroup'][0]
            node.properties['ageGroup'] = ageGroup
            node.push()

        if(contentDict.has_key('gradeLevel')):
            gradeLevel = contentDict['gradeLevel'][0]
            node.properties['gradeLevel'] = gradeLevel
            node.push()
    
        if(contentDict.has_key('owner')):
            owner = contentDict['owner']
            node.properties['owner'] = owner
            node.push()
    
        if(contentDict.has_key('concepts')):
            # this forms a "relationship" in the graph
            concepts = contentDict['concepts']
            
        for concept in concepts:
              if(concept.has_key('identifier')):
                node2 = graph.merge_one("Concept","id",concept['identifier'])

                graph.create(Relationship(node2, "COVERED_IN", node))
Beispiel #19
0
marnee_properties["age"] = 100
marnee_with_dict_node = Node.cast("Person", marnee_properties)
print marnee_with_dict_node
graph.create(marnee_with_dict_node)
# look at the graph
# BUT I END UP WITH GHOLA, I mean DUPLICATES
# How can we do this without duplication????

# Show merge in the browser

# PY2NEO
# graph.merge() -- returns a generator (generators are cool)
# graph.merge_one() -- returns one node
# show documentation

marnee_merge_node = graph.merge_one(label="Person", property_key="name", property_value="Marnee")
print marnee_merge_node

# but I have more than one property on this node.  How do I get them in there
# merge returns a node, or set of nodes, and we can do things to a Node like Node.Push
# Node.properties
# Node.push()

for key, value in marnee_properties.iteritems():  # so pythonic
            marnee_merge_node[key] = value
marnee_merge_node.push()
#look at the graph.  did we create a third Marnee?  No we only have two.

#How many Marnees do you know?
# comment out marnee dict and clear db to start over
Beispiel #20
0
import unicodedata
from py2neo import Graph, Node, Relationship

g = Graph()
g.delete_all()

starts = 'Stockholm', 'Edinburgh'

for start in starts:
	print "\nStarting " + start
	start_node = g.merge_one('Airport', property_key='name', property_value=start)
	for l in open(start + '.csv'):
		items = l.split(',')
		airline = unicodedata.normalize('NFKD', unicode(items[0], encoding='utf-8')).encode('ascii', 'ignore')
		print "Airline is " + airline
		for airport in items[1:]:
			airport = unicodedata.normalize('NFKD', unicode(airport, encoding='utf-8')).encode('ascii', 'ignore').strip().split('-')[0]
			print "Endpoint is " + airport
			end_node = g.merge_one("Airport", property_key='name', property_value=airport)
			g.create(Relationship(start_node, "FLIES_TO", end_node, airline=airline))
marnee_properties["age"] = 100
# marnee_with_dict_node = Node.cast("Person", marnee_properties)
# print marnee_with_dict_node
# graph.create(marnee_with_dict_node)
# # look at the graph
# # BUT I END UP WITH GHOLA, I mean DUPLICATES
# # How can we do this without duplication????
#
# # Show merge in the browser
#
# # PY2NEO
# # graph.merge() -- returns a generator (generators are cool)
# # graph.merge_one() -- returns one node
# # show documentation
#
marnee_merge_node = graph.merge_one(label="Person", property_key="name", property_value="Marnee")
print marnee_merge_node
#
# # but I have more than one property on this node.  How do I get them in there
# # merge returns a node, or set of nodes, and we can do things to a Node like Node.Push
# # Node.properties
# # Node.push()
#
for key, value in marnee_properties.iteritems():  # so pythonic
            marnee_merge_node[key] = value
marnee_merge_node.push()
# #look at the graph.  did we create a third Marnee?  No we only have two.
#
# #How many Marnees do you know?
# # comment out marnee dict and clear db to start over
#
Beispiel #22
0
'''
Import the second level in the subjects hierarchy. By default, this will be from the level1list.json file.
'''
import json
import sys

from py2neo import Graph
graph = Graph()
from py2neo import Node, Relationship

level1_f = sys.argv[1]

subjects = json.loads(open(level1_f).read())

for d in subjects:
    parent = graph.merge_one("Subject", "id", str(d["parent0"]))
    print str(d["id"]) + ":" + d["name"] + "--" + parent["name"]
    n = Node("Subject", id=d["id"], name=d["name"], level=1)
    r = Relationship(n, "TYPE_OF", parent)
    graph.create(r)


Beispiel #23
0
def update_show_info_old():
    print 'updating show info'
    authenticate("localhost:7474", "neo4j", "1234")
    graph = Graph(GRAPH_CONNECTION_STRNIG)

    results = graph.cypher.stream("match (s:Show) return id(s) as eid,s.id")
    start_id = 764
    for record in results:
        if int(record['s.id']) < start_id:
            continue

        node_show = graph.node(record['eid'])

        result_dict = {}

        success = True
        while success:
            try:
                show_info_e_list = requests.get(
                    'http://services.tvrage.com/feeds/full_show_info.php?sid={0}'.format(node_show['id']))
                result_dict = xmltodict.parse(show_info_e_list.text)

                omdb_show_info = requests.get(
                    'http://www.omdbapi.com/?t={0}&y=&plot=full&r=json'.format(node_show['name']))
                dict_omdb_show_info = json.loads(omdb_show_info.text)
                if dict_omdb_show_info['Response'] == 'True':
                    for key, value in dict_omdb_show_info.iteritems():
                        node_show[key] = value
                success = False
            except ValueError as e:
                logger.exception("Value Error")
                continue
            except Exception as e:
                logger.exception("Some network issue, will try again")
                success = True

        print str(node_show['name'])
        # info

        node_show['started'] = result_dict['Show'].get('started', None)
        node_show['ended'] = result_dict['Show'].get('ended', None)
        node_show['image'] = result_dict['Show'].get('image', None)
        node_show['status'] = result_dict['Show'].get('status', None)
        node_show.push()

        #Country
        from_country = result_dict['Show'].get('origin_country', 'unknown')

        node_country = graph.merge_one("Country", 'country', from_country)
        node_country.push()

        show_from_country = Relationship(node_show, "from", node_country)
        graph.create(show_from_country)


        #Genres
        if result_dict['Show'].get('genres', None) is not None:
            genre_list = []
            if type(result_dict['Show']['genres']['genre']) is list:
                genre_list = result_dict['Show']['genres']['genre']
            else:
                genre_list.append(result_dict['Show']['genres']['genre'])

            for genre in genre_list:
                node_genre = graph.merge_one("Genre", 'name', genre)
                node_genre.push()

                show_of_genre = Relationship(node_show, "of genre", node_genre)
                graph.create(show_of_genre)

        """try:
            print node_show['started']
            a = node_show['started'].split("/")
            if int(a[len(a)-1]) < 2000:
                continue
        except Exception:
            continue
        """

        #Seasons
        season_list = []
        if result_dict['Show'].get('Episodelist', None) is None:
            continue
        if type(result_dict['Show']['Episodelist']['Season']) is list:
            season_list = result_dict['Show']['Episodelist']['Season']
        else:
            season_list.append(result_dict['Show']['Episodelist']['Season'])

        for season in season_list:
            node_season = Node.cast('Season', {'no': season['@no']})
            graph.create(node_season)

            show_season = Relationship(node_show, "has", node_season)
            graph.create(show_season)

            #Episodes
            episode_list = []
            if type(season['episode']) is list:
                episode_list = season['episode']
            else:
                episode_list.append(season['episode'])
            count = 1
            for episode in episode_list:
                node_episode = Node.cast('Episode', {
                    'airdate': episode.get('airdate', None),
                    'epnum': count,
                    'screencap': episode.get('screencap', None),
                    'title': episode.get('title', None)
                })
                graph.create(node_episode)

                success = True
                while success:
                    try:
                        omdb_episode_info = requests.get('http://www.omdbapi.com/?t={0}&Season={1}&Episode={2}'
                                                         .format(node_show['name'],
                                                                 node_season['no'],
                                                                 node_episode['epnum']))
                        dict_omdb_episode_info = json.loads(omdb_episode_info.text)
                        if dict_omdb_episode_info['Response'] == 'True':
                            for key, value in dict_omdb_episode_info.iteritems():
                                node_episode[key] = value
                        node_episode.push()
                        success = False
                    except ValueError as e:
                        logger.exception("Value error")
                        continue
                    except Exception as e:
                        logger.exception("network issue: wil try again")
                        success = True

                show_episode = Relationship(show_season, "has", node_episode)
                graph.create(show_episode)
                count = count + 1

    print 'end updating show info'
Beispiel #24
0
from py2neo import Graph, Node, Relationship
import json

f = open('tt2.json', 'r')
jj = json.loads(f.read())
f.close()


graph = Graph('http://*****:*****@localhost:7474/db/data')

for post in jj:
    poster = graph.merge_one("User", "id", post['poster'])
    neoPost = graph.merge_one("Post", "id", post['id'])
    posted = graph.create_unique(Relationship(poster, "POSTED", neoPost))
    print "(%s)-[:POSTED]->(%s)" % (post['poster'], post['id'])

    if post.get('reblogged_from'):
        reblogger = graph.merge_one("User", "id", post['reblogged_from'])
        reblogged_post = graph.merge_one("Post", "id", post['reblog_post_id'])
        graph.create_unique(Relationship(reblogger, "POSTED", reblogged_post))
        graph.create_unique(Relationship(neoPost, "REBLOG_OF", reblogged_post))
        print "(%s)-[:POSTED]->(%s)" % (post['reblogged_from'], post['reblog_post_id'])

    if post.get('original_poster'):
        original_poster = graph.merge_one("User", "id", post['original_poster'])
        original_post = graph.merge_one("Post", "id", post['original_post_id'])
        graph.create_unique(Relationship(original_poster, "POSTED", original_post))
        graph.create_unique(Relationship(neoPost, "ORIGINATES_FROM", original_post))
        print "(%s)-[:POSTED]->(%s)" % (post['original_poster'], post['original_post_id'])
Beispiel #25
0
# Authenticate and create graph
authenticate("localhost:7474", "neo4j", "somak");
graph = Graph();
idx = graph.legacy.get_or_create_index(neo4j.Node, "Entities")
# TODO: everytime database is reset, create the constraint
#graph.schema.create_uniqueness_constraint("Entity", "name")
words_dict={};

if len(sys.argv) < 2:
	print "python conceptnetneo4j.py <seedsfile>";
	sys.exit();
with open(sys.argv[1], "r") as f:
	for line in f:
		word=line.strip();
		word = str('/c/en/')+word;
		n = graph.merge_one("Entity", "name", word);
		#n['completed'] = 0;
		idx.add("name",encode(n["name"]),n);

with open(sys.argv[1], "r") as f:
	i=0;
	threads = [];
	for line in f:
		word=line.strip();
		word = str('/c/en/')+word;
		FINDER = AssertionFinder();
		t = threading.Thread(target=recursivelyAddNodesAndEdges, name=str(i), args=(word,idx,graph,FINDER,0));
		t.start();
		threads.append(t);
		i = i+1;
		if i%10 == 0:
# Read the author data structure file for ACM Scraping.
with open('data/acm_author.json') as author_file:
    author_structure = json.load(author_file)

# Create a node for every author of type "Author" storing the first, middle, last and full name. Currently we use the unique ACM Profile link for the author as the unique constraint while creating the node.
for key, value in author_structure.items():
    for record in value:
        link = str(record['link'])
        # print(link)
        first_name = record['FName']
        mid_name = record['MName']
        last_name = record['LName']
        full_name = record['FULL Name']

        author_to_be_added = graph.merge_one("Author", "link", link)
        author_to_be_added['full_name'] = full_name
        author_to_be_added['fist_name'] = first_name
        author_to_be_added['middle_name'] = mid_name
        author_to_be_added['last_name'] = last_name
        author_to_be_added.push()
        print(record['FULL Name'] + "\t")

print("\n")
# Read the journal and article data structure file for ACM Scraping
with open('data/tmp.json') as journal_article_file:
    acm_structure = json.load(journal_article_file)

j_list = []
a_list = []
Beispiel #27
0
class StuffNeo4j():
    
    def __init__(self, nodelabel, reltype):
        self.graph_db = None
        self.nodelabel = nodelabel
        self.reltype = reltype
        
    def connect(self, uri, usr="******", pwd="neo4j"):
        if not uri.endswith('/'):
            uri += '/'
        authenticate(uri, usr, pwd)
        self.graph_db = Graph(uri + "db/data")
        
    def create_indexes(self):
        #If index is already created py2neo throws exception.
        try:
            self.graph_db.cypher.execute("CREATE INDEX ON :%s(name)" % 
                self.nodelabel)
        except:
            pass
        try:
            self.graph_db.cypher.execute("CREATE INDEX ON :%s(synset_id)" % 
                self.nodelabel)
        except:
            pass
        try:
            self.graph_db.cypher.execute("CREATE INDEX ON :%s(pointer_symbol)" %
                self.reltype)
        except:
            pass
    
    def create_node(self, nodetype, **kwargs):
        return Node(nodetype, **kwargs)
        
    def merge_node(self, nodetype, uniq_key, uniq_val, **kwargs):
        n = self.graph_db.merge_one(nodetype, uniq_key, uniq_val)
        for k in kwargs:        
            n.properties[k] = kwargs[k]
        n.push()
        return n
   
    def insert_rel(self, reltype, node1, node2, **kwargs):
        if node1 is not None and node2 is not None: 
            rel = Relationship(node1, reltype, node2, **kwargs)
            self.graph_db.create(rel)
        else:
            print "Could not insert relation (%s) - [%s] -> (%s)" % (           
                node1, reltype, node2)
            
    def merge_rel(self, reltype, node1, node2, **kwargs):
        if node1 is not None and node2 is not None: 
            rel = Relationship(node1, reltype, node2, **kwargs)
            return self.graph_db.create_unique(rel)
        else:
            print "Could not merge relation (%s) - [%s] -> (%s)" % (           
                node1, reltype, node2)
    
    def create_wordnet_rel(self, synset1, synset2, ptype):
        """
        Pointer symbols
        http://wordnet.princeton.edu/wordnet/man/wninput.5WN.html
        
         The pointer_symbol s for nouns are:
        
            !    Antonym
            @    Hypernym
            @i    Instance Hypernym
             ~    Hyponym
             ~i    Instance Hyponym
            #m    Member holonym
            #s    Substance holonym
            #p    Part holonym
            %m    Member meronym
            %s    Substance meronym
            %p    Part meronym
            =    Attribute
            +    Derivationally related form        
            ;c    Domain of synset - TOPIC
            -c    Member of this domain - TOPIC
            ;r    Domain of synset - REGION
            -r    Member of this domain - REGION
            ;u    Domain of synset - USAGE
            -u    Member of this domain - USAGE
        
        The pointer_symbol s for verbs are:
        
            !    Antonym
            @    Hypernym
             ~    Hyponym
            *    Entailment
            >    Cause
            ^    Also see
            $    Verb Group
            +    Derivationally related form        
            ;c    Domain of synset - TOPIC
            ;r    Domain of synset - REGION
            ;u    Domain of synset - USAGE
        
        The pointer_symbol s for adjectives are:
        
            !    Antonym
            &    Similar to
            <    Participle of verb
            \    Pertainym (pertains to noun)
            =    Attribute
            ^    Also see
            ;c    Domain of synset - TOPIC
            ;r    Domain of synset - REGION
            ;u    Domain of synset - USAGE
        
        The pointer_symbol s for adverbs are:
        
            !    Antonym
            \    Derived from adjective
            ;c    Domain of synset - TOPIC
            ;r    Domain of synset - REGION
            ;u    Domain of synset - USAGE 
        """
        node1 = self.graph_db.find_one(self.nodelabel, 
                                       property_key="synset_id",
                                       property_value=synset1)
        node2 = self.graph_db.find_one(self.nodelabel, 
                                       property_key="synset_id",
                                       property_value=synset2)
        if (node1 is not None) and (node2 is not None):
            rel = Relationship(node1, self.reltype, node2, pointer_symbol=ptype)
            return rel
        else:
            raise Exception("Could not create Wordnet relation (%s) - [%s] -> (%s)" % (           
                synset1, ptype, synset2))
        
    def insert_bulk(self, objs):
        if len(objs) > 0:
            self.graph_db.create(*objs)
class Neo4jModel:
    def __init__(self):
        self.graph = Graph()

    def create(self):
        self.graph.schema.create_uniqueness_constraint("Region", "name")
        self.graph.schema.create_uniqueness_constraint("Court", "name")
        self.graph.schema.create_uniqueness_constraint("Court_Decision_Type", "name")
        self.graph.schema.create_uniqueness_constraint("Court_Judgement_Type", "name")
        self.graph.schema.create_uniqueness_constraint("Case", "id")
        self.graph.schema.create_uniqueness_constraint("Chairman", "name")

    def region(self, region_name):
        __region = self.graph.merge_one("Region", "name", region_name)
        __region.push()
        return __region

    def court(self, court_name, region_name):
        __court = self.graph.merge_one("Court", "name", court_name)
        __court.push()
        self.graph.create_unique(Relationship(__court, "SITUATED_IN", self.region(region_name)))
        return __court

    def chairman(self, chairman_name):
        __chairman = self.graph.merge_one("Chairman", "name", chairman_name)
        __chairman.push()
        return __chairman

    def decision_type(self, decision_type_name):
        __decision_type = self.graph.merge_one("Court_Decision_Type", "name", decision_type_name)
        __decision_type.push()
        return __decision_type

    def judgement_type(self, judgement_type_name):
        __judgement_type = self.graph.merge_one("Court_Judgement_Type", "name", judgement_type_name)
        __judgement_type.push()
        return __judgement_type

    def case(self, court_case, region_name):
        __case = self.graph.merge_one("Case", "id", court_case.decision_number)
        __case["reg_date"] = __timestamp__(court_case.reg_date)
        __case["law_date"] = __timestamp__(court_case.law_date)
        __case["link"] = court_case.link
        __case["text"] = court_case.text
        __case["case_number"] = court_case.case_number
        self.graph.create_unique(Relationship(__case, "RULED_BY", self.court(court_case.court_name, region_name)))
        self.graph.create_unique(Relationship(__case, "CARRIED_BY", self.chairman(court_case.chairman)))
        self.graph.create_unique(Relationship(__case, "OF_JUDGEMENT_TYPE", self.judgement_type(court_case.vr_type)))
        self.graph.create_unique(Relationship(__case, "OF_DECISION_TYPE", self.decision_type(court_case.cs_type)))
        __case.push()
        return __case

    def change_date(self):
        query = "MATCH (n:Case) WHERE NOT (n.law_date='') RETURN n LIMIT 5"
        id_list = []
        for n in self.graph.cypher.execute(query):
            id_list.append(n[0].__str__()[2:].split(':')[0])  # getting an id
        for _id in id_list:
            n = self.graph.node(str(_id))
            n['law_date'] = __timestamp__(n['law_date'])
            n.push()
            print(n)
Beispiel #29
0
from py2neo import Graph, Path, Node, Relationship

db = MongoClient('mongodb://<user>:<pass>@ds<id>.mongolab.com:<port>/<db>')
collection = db["muziekcentrum"]["muziekcentrum"]

graph = Graph("http://<user>:<apikey>@<db>.sb02.stations.graphenedb.com:<port>/db/data/")

graph.cypher.execute("MATCH (n) OPTIONAL MATCH (n)-[r]-() DELETE r,n")

graph.schema.drop_uniqueness_constraint("Album", "name")
graph.schema.drop_uniqueness_constraint("Uitvoerder", "name")
graph.schema.drop_uniqueness_constraint("Label", "name")

graph.schema.create_uniqueness_constraint("Album", "name")
graph.schema.create_uniqueness_constraint("Uitvoerder", "name")
graph.schema.create_uniqueness_constraint("Label", "name")


for doc in collection.find({"Type": "album"}):
  for uitvoerder in doc["Uitvoerder(s)"]:
    uitvoerder_node = graph.merge_one("Uitvoerder", "name", uitvoerder)
    album_node = graph.merge_one("Album", "name", doc["Titel"])
    uitvoerder_makes_album = Relationship(uitvoerder_node, "MADE", album_node)
    graph.create_unique(uitvoerder_makes_album)

  for label in doc["Label(s)"]:
    label_node = graph.merge_one("Label", "name", label)
    album_node = graph.merge_one("Album", "name", doc["Titel"])
    label_releases_album = Relationship(label_node, "RELEASED", album_node)
    graph.create_unique(label_releases_album)
Beispiel #30
0
#/usr/bin/python

# a simple neo4j script to create two nodes and an edge

# setup user name & password
import myconfig
print myconfig.getUser()

from py2neo import Graph, Path, Node, Relationship
graph = Graph("http://" + myconfig.getUser() + ":" + myconfig.getPass() +
              "@localhost:7474/db/data")

# graph.merge_one only creates this node if it has to
subject = graph.merge_one("instance", "uniqueid", "E58")
object = graph.merge_one("instance", "uniqueid", "billy2")

# only create unique relationships, don't need to keep re-expressing the same thing
# this fails for some reason
#rel = graph.create_unique(Relationship(subject,"bredBy",object))
rel = Relationship(subject, "bredBy", object)

graph.create(rel)
Beispiel #31
0
			if "children" in artwork["subjects"]:
				for sl1 in artwork["subjects"]["children"]:
					#print sl1["name"]
					if "children" in sl1:
						for sl2 in sl1["children"]:
							#print "-" + sl2["name"]
							if "children" in sl2:
								for sl3 in sl2["children"]:
									#print "--" + sl3["name"]
									subjects.append(sl3["id"])
		
		node = Node("Artwork", id=artwork["id"], title=artwork["title"], acno=artwork["acno"])		
		graph.create(node)

		for s in subjects:
			subject = graph.merge_one("Subject", "id", s)
			r = Relationship(node, "FEATURES", subject)
			graph.create(r)

		if artwork["medium"]:
			for m in artwork["medium"].split(","):
				for n in m.split(" and "):
					for o in n.split(" on "):
						s = ''.join([i for i in o if not i.isdigit()])
						if s.strip().lower() not in mediums:
							mediums.append(s.strip().lower())
		
		for m in mediums:
			medium = graph.merge_one("Medium", "id", m)
			r = Relationship(node, "MADE_OF", medium)
			graph.create(r)
Beispiel #32
0
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)

#Get specific hashtag  until the date we want
tweets = tweepy.Cursor(api.search,
                       q="#SuperBowl",
                       count=100,
                       until='2016-02-09',
                       include_entities=True).items()
for tweet in tweets:
    #find if exists for exploration...
    mynode = list(
        graph.find('User',
                   property_key='Screen_Name',
                   property_value=tweet.user.screen_name.encode('utf8')))

    x = graph.merge_one("User", "Screen_Name",
                        tweet.user.screen_name.encode('utf8'))
    x.properties.update({
        "Name": tweet.user.name,
        "Description": tweet.user.description.encode('utf8'),
        "Location": tweet.user.location,
        "Followers": tweet.user.followers_count,
        "Friends": tweet.user.friends_count,
        "Tweets": tweet.user.statuses_count,
        "Image": tweet.user.profile_image_url
    })
    if len(mynode) == 0:
        x.properties.update({"Exploration": ''})
    x.push()

    t = graph.merge_one("Tweet", "ID", tweet.id)
# Read the author data structure file for ACM Scraping.
with open('data/acm_author.json') as author_file:
	author_structure = json.load(author_file)

# Create a node for every author of type "Author" storing the first, middle, last and full name. Currently we use the unique ACM Profile link for the author as the unique constraint while creating the node.
for key, value in author_structure.items():
	for record in value:
		link = str(record['link'])
		# print(link)
		first_name = record['FName']
		mid_name = record['MName']
		last_name = record['LName']
		full_name = record['FULL Name']

		author_to_be_added = graph.merge_one("Author", "link", link)
		author_to_be_added['full_name'] = full_name
		author_to_be_added['fist_name'] = first_name
		author_to_be_added['middle_name'] = mid_name
		author_to_be_added['last_name'] = last_name
		author_to_be_added.push()
		print(record['FULL Name'] + "\t")

print("\n")
# Read the journal and article data structure file for ACM Scraping
with open('data/tmp.json') as journal_article_file:
	acm_structure = json.load(journal_article_file)

j_list = []
a_list = []
Beispiel #34
0
class SyntaxGraph():
    
    """
    The aim of this class is to find associated words to database syntax.
    A user will input a sentence, and these associations will be used to
    find the correct SQL statement to execute in the database.
    
    The relations between words are modelled as a graph. The nodes of the 
    graph are the words, and the edges (relationships) between nodes
    represent when a word means another word (e.g. is a synonym).
    
    The graph is "seeded" using a set of database syntax words, finding 
    synonyms/related words to these initial words using a call to a
    thesaurus API.
    
    The graph is then "grown" from the resulting synonyms using subsequent
    API calls, in a recursive fashion.
    
    When a user enters a sentence, this graph will be used to find 
    database syntax words which are within a certain "degree of 
    separation" from each word in the sentence, in an attempt to 
    start building a SQL query from this sentence.
    """
    
    def __init__(self, seed_words=None, seed_mappings=None):
        
        self.sql_terms = SQLTerms().sql_terms
        
        self.graph = Graph(DB_URI)
        self.tx = self.graph.cypher.begin()
        
        self.seed_mappings = seed_mappings or {'where': ['filter', 'for', 'during'],
                                               'from': ['source', 'in'],
                                               'into': ['toward', 'within', 'inside'],
                                               'group':['by'],
                                               'and': ['with']}
        
        self.seed_words = seed_words or [x for x in self.sql_terms if x not in self.seed_mappings]
    
        self.seed_words.extend([x for x in self.seed_mappings.iterkeys()])
        
        self.exclude_words = ['display']
        
    def seed(self, reset=False):
        
        print 'Seeding graph'
        
        if reset:
            self.graph.delete_all()
        
        for word in self.seed_words:
            if not self.already_called(word):
                self.add_synonyms(word)
            if word in self.seed_mappings:
                print 'Mapping %s to %s' % ( ','.join(self.seed_mappings[word]), word )
                base = self.graph.merge_one('Word', 'name', word)
                synonyms = [self.graph.merge_one('Word', 'name', x) for x in self.seed_mappings[word]]
                [self.graph.create_unique(Relationship(base, 'MEANS', synonym)) for synonym in synonyms]
                [self.graph.create_unique(Relationship(synonym, 'MEANS', base)) for synonym in synonyms]
            
                
    def grow(self, levels=1):
        
        print 'Levels left: %d' % levels
        
        query = ''' MATCH (w:Word)
                    WHERE NOT HAS (w.called)
                    RETURN w.name
                '''
        
        results = self.graph.cypher.execute(query)     
        
        for word in results:
            self.add_synonyms(word['w.name'])
            
        if levels > 1:
            self.grow(levels-1)
                
            
    def already_called(self, word):
        
        if len (self.graph.cypher.execute('''MATCH (w:Word)
                                             WHERE w.name = '%s'
                                               AND HAS (w.called)
                                             RETURN w.name 
                                          ''' % word) ) > 0:
            return True
        
    def update_set_called(self, word):
        
        word_node = self.graph.merge_one('Word', 'name', word)
        word_node.properties['called'] = 1
        word_node.push()
        
    def add_synonyms(self, word):
                                     
        url = 'http://words.bighugelabs.com/api/2/%s/%s/json' % (API_KEY, word)
        print url
        
        response = requests.get(url)
        
        try:
            data = response.json()
        except JSONDecodeError:
            self.update_set_called(word)
            return
        
        if 'verb' in data:
            for key in data['verb']:
                # Synonyms: words are all interrelated (connected graph)
                if key == 'syn':
                    
                    synonyms = [word]
                    synonyms.extend([x for x in data['verb'][key] if ' ' not in x])
                    
                    nodes = [self.graph.merge_one('Word', 'name', x) for x in synonyms]
                    [self.graph.create_unique(Relationship(i, 'MEANS', j)) for j in nodes for i in nodes if i!=j]
                    
                # Similar / user defined words: words are related both ways between root and related words (both direction)
                elif key in ('sim', 'usr'):
                    
                    related_words = [word]
                    related_words.extend([x for x in data['verb'][key] if ' ' not in x])
                    
                    nodes = [self.graph.merge_one('Word', 'name', x) for x in related_words]
                    [self.graph.create_unique(Relationship(nodes[i], 'MEANS', nodes[j])) for j in range(len(nodes)) for i in range(len(nodes)) if (i+j>0 and i*j==0)]
                    
                # Related words: words are related only from root to related word (one direction)
                elif key == 'rel':
                    
                    related_words = [word]
                    related_words.extend([x for x in data['verb'][key] if ' ' not in x])
                    
                    nodes = [self.graph.merge_one('Word', 'name', x) for x in related_words]
                    [self.graph.create_unique(Relationship(nodes[0], 'MEANS', nodes[i])) for i in range(1, len(nodes))]
            
        self.update_set_called(word)
        
    def replace_word(self, word, max_degree_separation=2):
        
        if word in self.seed_words or word in self.exclude_words: return word
        
        replacement_candidates = []
        
        for seed_word in self.seed_words:
        
            query = '''MATCH p=shortestPath((w:Word{name:"%s"})-[*]-(n:Word{name:"%s"}))
                       RETURN length(p), n.name
                    ''' % (word, seed_word)
                    
            results = self.graph.cypher.execute(query)
            
            try:
                replacement_candidates.append(min([(row['length(p)'], row['n.name']) for row in results]))
            except ValueError:
                pass

        if len(replacement_candidates) > 0:
            replacement = min(replacement_candidates)
            if replacement[0] <= max_degree_separation:
                return replacement[1]
        
    def replace_text(self, text):
        
        pattern = re.compile('[\W_]+')
        cleaned = []
        replacements = []
        
        for word in text.split():
            cleaned_word = pattern.sub('', word)
            
            if cleaned_word not in [x[0] for x in cleaned]:
                cleaned.append([cleaned_word, self.replace_word(cleaned_word)])
            
            replacements.append(self.replace_word(cleaned_word) or cleaned_word)
        
        return ' '.join(replacements)
Beispiel #35
0
    ],
    'Elegant': ['Arctic Zen', 'Soho Sophistication', 'Istanbul Mosaic'],
    'Cozy': [
        'Ubud Terraces', 'Mekong Meander', 'Lunuganga Estate',
        'Dorchester Comfort'
    ],
    'Stylish':
    ['Santorini Calm', 'Arctic Zen', 'Jodhpur Blues', 'Soho Sophistication'],
    'Quirky': ['Warhol Burst', 'Banksy Quirk'],
    'Vibrant': [
        'Sindhoor Colonial', 'Istanbul Mosaic', 'Eiffel Chic', 'Red Earth',
        'Malnad Pure'
    ]
}

keys = questions.keys()

for k in keys:
    q = Node("Questions", name=k)
    q.properties['order'] = q_order[k]
    for e in questions[k]:
        o = Node("Options", name=e)
        o.properties['image'] = opt_images[e]
        rel = Relationship(q, "HAS_OPTION", o)
        graph.create(rel)
        for z in op_maps[e]:
            z_node = graph.merge_one('Profiles', 'name', z)
            rel_op = Relationship(o, "LEADS_TO_PROFILE", z_node)
            graph.create(rel_op)
        watch("httpstream")
Beispiel #36
0
def import_api_data():
    """
    imports data from my register (method and all adjacent) into graph DB
    """

    graph = Graph()
    # graph.delete_all()
    # Uncomment on the first run!
    # graph.schema.create_uniqueness_constraint("Method", "id")
    # graph.schema.create_uniqueness_constraint("Author", "id")
    # graph.schema.create_uniqueness_constraint("Category", "id")

    obtajenna = get_objects_art('obtaj')

    for api_obtaj in obtajenna:

        node_demand= graph.merge_one("Demand", "id", api_obtaj["id"])
        node_demand["reason_doc"] = api_obtaj["reason_doc"]
        node_demand["cost_size"] = api_obtaj["cost_size"]

        for api_author in api_obtaj["borjnuku"]:
            node_borjnuk = graph.merge_one("Borjnuk", "id", api_author["id"])
            node_borjnuk["name"] = api_author["name"]
            node_borjnuk["tel_number"] = api_author["tel_number"]
            node_borjnuk.push()
            graph.create_unique(Relationship(node_borjnuk, "obtajuetsa", node_demand))

        for api_property in api_obtaj["properties"]:
            node_property = graph.merge_one("Property", "id", api_property["id"])
            node_property["name"] = api_property["name_property"]
            node_property["ser_number"] = api_property["ser_number"]
            node_property.push()
            graph.create_unique(Relationship(node_property, "zakladena", node_demand))
        node_demand.push()

    demands = get_objects('demand')

    for api_demand in demands:

        node_demand = graph.merge_one("Demand", "id", api_demand["id"])
        node_demand["sum"] = api_demand["sum"]

        api_debtor = api_demand["Debtor"]
        node_debtor = graph.merge_one("Debtor", "id", api_debtor["id"])
        node_debtor["name"] = api_debtor["name"]

        api_arbitration = get_object('arbitration/' + str(api_debtor["arbitration_id"]))
        #print(api_arbitration.text)
        node_arbitration = graph.merge_one("Arbitration", "id", api_arbitration["id"])
        node_arbitration["name"] = api_arbitration["name"]
        node_arbitration.push()
        graph.create_unique(Relationship(node_arbitration, "CONTAINS", node_debtor))

        node_debtor.push()
        graph.create_unique(Relationship(node_debtor, "CONTAINS", node_demand))

        api_creditor = api_demand["Creditor"]
        node_creditor = graph.merge_one("Creditor", "id", api_creditor["id"])
        node_creditor["name"] = api_creditor["name"]
        node_creditor.push()
        graph.create_unique(Relationship(node_creditor, "CONTAINS", node_demand))


        """
        for api_author in api_method["authors"]:
            node_author = graph.merge_one("Author", "id", api_author["id"])
            node_author["name"] = api_author["name"]
            node_author.push()
            graph.create_unique(Relationship(node_author, "WROTE", node_method))

        api_category = api_method["category"]
        node_category = graph.merge_one("Category", "id", api_category["id"])
        node_category["name"] = api_category["name"]
        node_category.push()
        graph.create_unique(Relationship(node_category, "CONTAINS", node_method))"""
        node_demand.push()
Beispiel #37
0
'''
Import the third level in the subjects hierarchy. By default, this will be from the level2list.json file.

CLI parameters:
argv[1] - full path to json file, including filename
argv[2] - optional flag to print or not
'''
import json
import sys

from py2neo import Graph
graph = Graph()
from py2neo import Node, Relationship

level2_f = sys.argv[1]
verbose = sys.argv[2]

subjects = json.loads(open(level2_f).read())

for d in subjects:
    parent = graph.merge_one("Subject", "id", d["parent1"])
    if verbose:
        print str(d["id"]) + ":" + d["name"] + "--" + parent["name"]
    n = Node("Subject", id=d["id"], name=d["name"], level=2)
    r = Relationship(n, "TYPE_OF", parent)
    graph.create(r)
Beispiel #38
0
    if (colorFound == False):
        prod_name = node.properties['product_name']
        color = utilities.searchPrefix(prod_name)
        #Create color node and relationship
        color_node = graph.merge_one('Color', 'Color', color)
        node_rel_dest = Relationship(node, "HAS_COLOR", color_node)
        graph.create_unique(node_rel_dest)
    """

# main

# Create nodes and relationship between category and sub-category

graph.delete_all()

parent_cat_node = graph.merge_one('Category', 'product_category', 'Mobiles & Tablets')
sub_cat_node = graph.merge_one('Category', 'product_sub_category', 'Mobile Phones')
node_rel_dest = Relationship(sub_cat_node, "SUB_CAT_OF", parent_cat_node)
graph.create_unique(node_rel_dest)

for d in data:
    rec = d['record']
    if not rec['product_name'] or not rec['uniq_id']:
        logging.info ("Incomplete product ... skipping")
        logging.debug(rec)
        continue
    else:
        node = createProductNode(rec)
        addNodeProperties(node, rec)
        node.push()
    
Beispiel #39
0
    # Authentication
    authenticate("localhost:7474", args.user, args.password)
    db = Graph()

    # Start with empty database
    db.delete_all()

    # Index your data
    db.cypher.execute("CREATE INDEX ON :Beer(name)")
    db.cypher.execute("CREATE INDEX ON :Brewery(name)")
    db.cypher.execute("CREATE INDEX ON :Alcohol(percentage)")
    db.cypher.execute("CREATE INDEX ON :Type(type)")

    # Add nodes + relations
    # `merge_one` will try to match an existing node
    for row in read_rows(args.file, args.delimiter):
        beer = db.merge_one("Beer", "name", row["Merk"])
        brewer = db.merge_one("Brewery", "name", row["Brouwerij"])
        alc = db.merge_one("Alcohol", "percentage", row["Percentage alcohol"])

        # Add node to db (this is kinda slow, you might want to do it in batches)
        db.create(Relationship(beer, "has_alcohol", alc))
        db.create(Relationship(brewer, "brews", beer))

        # Comma seperated
        for t in row["Soort"].split(","):
            btype = db.merge_one("Type", "type", t)
            db.create(Relationship(beer, "is_a", btype))

    print "Done."
                    posts = Relationship(initu, "POSTS", initt)
                    graph.create_unique(posts)
                except Exception, e:
                    #error handler
                    print 'Exception Retweet'
                    pass

        # find REPLY source
        # Check if we have a Reply
        if tweet.in_reply_to_status_id != None:
            # Get Tweet attributes on trpl based on TWEET ID
            try:
                trpl = api.get_status(id=tweet.in_reply_to_status_id)

                # rpl is the Tweet node in which we Reply to
                rpl = graph.merge_one('Tweet', 'ID', trpl.id)

                if hasattr(trpl, 'retweeted_status'):
                    rtcount = 0
                else:
                    rtcount = trpl.retweet_count

                rpl.properties.update({
                    "Date":
                    trpl.created_at.strftime('%Y-%m-%d %H:%M:%S'),
                    "Text":
                    trpl.text.encode('utf8'),
                    "Favourites":
                    trpl.favorite_count,
                    "Retweets":
                    rtcount
Beispiel #41
0
neo_instance = "192.168.1.4:7474"
neo_username = "******"
neo_password = "******"

authenticate(neo_instance,neo_username,neo_password)
neo = Graph("http://192.168.1.4:7474/db/data")
try:
	neo.schema.create_uniqueness_constraint("Function", "name")
except:
	pass
	
target = idaapi.get_root_filename()
for f in Functions():
	callee_name = GetFunctionName(f)
	callee = neo.merge_one("Function","name",callee_name)
	if target not in callee.labels:
		callee.labels.add(target)
		callee.push()
	for xref in XrefsTo(f):
		caller_name = GetFunctionName(xref.frm)
		if caller_name == '':
			print "Indirect call to " + callee_name + " ignored."
			continue
		caller = neo.merge_one("Function","name",caller_name)
		if target not in callee.labels:
			callee.labels.add(target)
			callee.push()
		caller_callee = Relationship(caller, "CALLS", callee)
		neo.get_or_create(caller_callee)
print "Export finished"
Beispiel #42
0
class CategoryTree(object):
    
    def __init__(self, country):
        project_conf = get_project_conf()
        neo_host = project_conf.get("NEO4J", "host")
        user = project_conf.get("NEO4J", "username")
        password = project_conf.get("NEO4J", "password")
        logging.getLogger("py2neo.batch").setLevel(logging.WARNING)
        logging.getLogger("py2neo.cypher").setLevel(logging.WARNING)
        logging.getLogger("httpstream").setLevel(logging.WARNING)
        authenticate(neo_host, user, password)
        self.graph = Graph("http://%s/db/data/" % neo_host)
        try:
            self.graph.schema.create_uniqueness_constraint("Category", "id")
        except:
            pass
        self.categories = self.get_categories(country)

    def merge_node(self, node, country, do_not_load=False):
        category_id = "%s%s" % (country, str(node['BrowseNodeId']))
        category = self.graph.merge_one('Category', 'id', category_id)
        if 'name' not in category.properties:
            category['name'] = node['Name']
            category['is_root'] = int(node.get('IsCategoryRoot', 0))
            category['do_not_load'] = bool(do_not_load)
            category['country'] = country
            category.push()

        if not category_id in self.categories:
            self.categories[category_id] = self.category_node_dict(category)

        return category

    def relationship(self, parent, child):
        return Relationship(parent, 'HAS_CHILD', child)

    def relationship_exists(self, parent, child):
        if len(list(self.graph.match(start_node=parent,
                                     end_node=child,
                                     rel_type='HAS_CHILD'))) > 0:
            return True
        return False

    def create_relationship(self, relationship):
        self.graph.create_unique(relationship)
        relationship.push()

    def create_relationships(self, parent, children):
        for child in children:
            self.create_relationship(parent, child)


    def add_new_category(self, browsenode, amazon_api, country):
        # browse_node expected format
        #{u'Ancestors': {u'BrowseNode': {u'Ancestors': {u'BrowseNode': {u'BrowseNodeId': u'560798',
        #                                                               u'Name': u'Electronics & Photo'}},
        #                                u'BrowseNodeId': u'560800',
        #                                u'IsCategoryRoot': u'1',
        #                                u'Name': u'Categories'}},
        # u'BrowseNodeId': u'1340509031',
        # u'Children': {u'BrowseNode': [{u'BrowseNodeId': u'560826',
        #                                u'Name': u'Accessories'},
        #                               {u'BrowseNodeId': u'2829144031',
        #                                u'Name': u'Big Button Mobile Phones'},
        #                              {u'BrowseNodeId': u'430574031',
        #                               u'Name': u'Mobile Broadband'},
        #                              {u'BrowseNodeId': u'5362060031',
        #                               u'Name': u'Mobile Phones & Smartphones'},
        #                              {u'BrowseNodeId': u'213005031',
        #                               u'Name': u'SIM Cards'},
        #                              {u'BrowseNodeId': u'3457450031',
        #                               u'Name': u'Smartwatches'}]},
        # u'Name': u'Mobile Phones & Communication'}
        added_categories = []
        do_not_load = True

        current_browsenode = browsenode
        # Determine the value of do not load according to the youngest ancestor's do_not_load
        while 'Ancestors' in current_browsenode:
            current_id = "%s%s" % (country, current_browsenode['BrowseNodeId'])
            current_node = self.categories.get(current_id, None)
            if not current_node:
                if type(current_browsenode['Ancestors']) is dict:
                    current_browsenode = current_browsenode['Ancestors']
                elif type(current_browsenode['Ancestors']) is list:
                    current_browsenode = current_browsenode['Ancestors'][0]
                    # This shouldn't happen. But if it does better to log and continue with the first one
            else:
                do_not_load = bool(current_node['do_not_load'])
                break

        # Create the missing nodes and relationships

        child = self.merge_node(browsenode, country, do_not_load)
        added_categories.append(child)

        current_browsenode = browsenode
        while 'Ancestors' in current_browsenode and int(current_browsenode.get("IsCategoryRoot", 0))!=1:
            if type(current_browsenode['Ancestors']) is dict:
                parent_browsenode_id = current_browsenode['Ancestors']['BrowseNode']['BrowseNodeId']
            elif type(current_browsenode['Ancestors']) is list:
                # This shouldn't happen. But if it does better to log and continue with the first one
                parent_browsenode_id = current_browsenode['Ancestors'][0]['BrowseNode']['BrowseNodeId']

            parent_graph_id="%s%s" % (country,parent_browsenode_id)
            parent_node = self.categories.get(parent_graph_id, None)
            if parent_node:
                parent = self.get_category(parent_graph_id)
                relationship = self.relationship(parent, child)
                self.create_relationship(relationship)
                break
            else:
                parent_browsenode = amazon_api.get_node(parent_browsenode_id)
                if type(parent_browsenode) is dict:
                    parent = self.merge_node(parent_browsenode, country,
                                             do_not_load)
                    relationship = self.relationship(parent, child)
                    self.create_relationship(relationship)
                    added_categories.append(parent)
                    current_browsenode = parent_browsenode
                elif parent_browsenode == "AWS.InvalidParameterValue":
                    print "Deleting node %s and all its children" % str(parent_browsenode_id)
                    self.delete_category(parent_browsenode_id)
                    break
                else:
                    #self.logger.warning("Unknown error from amazon API.")
                    print 'Unknown error from amazon API. %s' % parent_browsenode
                    break

        for category in added_categories:
            category_id = "%s%s" % (country, category['id'])
            length = self.get_shortest_length_to_root(category_id)
            category['shortest_length_root'] = length
            category.push()
            self.categories[category_id] = self.category_node_dict(category)

        new_category_id = "%s%s" % (country, browsenode['BrowseNodeId'])
        return self.categories.get(new_category_id)

    def category_node_dict(self, category_node):
        result = {
            'is_root': category_node['is_root'],
            'id': category_node['id'],
            'name': category_node['name'],
            'do_not_load': category_node['do_not_load'],
            'shortest_length_root': category_node['shortest_length_root']
        }
        return result


    def get_categories(self, country):
        categories = {}
        records = self.graph.find('Category', property_key='country',
                                   property_value=country)
        for category in records:
            categories[category['id']] = self.category_node_dict(category)
        return categories


    def get_category(self, category_id):
        category = self.graph.find_one('Category', property_key='id', property_value=category_id)

        if category:
            return self.category_node_dict(category)

    def is_orphan(self, category_id):
        category = self.get_category(category_id)
        if not category:
            return True

        if not bool(category['is_root']):
            query = """MATCH p=a-[:HAS_CHILD*]->n
                       WHERE n.id = {id} AND a.is_root=1
                       RETURN p
                       LIMIT 1"""
            cypher = self.graph.cypher
            path = cypher.execute_one(query, id=category_id)
            if not path:
                return True
        return False                 

    def get_children(self, category_id):
        query = """MATCH (n)-[r:HAS_CHILD*]->(m)
                   WHERE n.id = {id}
                   RETURN m"""
        cypher = self.graph.cypher
        children = cypher.execute(query, id=category_id)
        return children

    def delete_category(self, category_id):
        cypher = self.graph.cypher
        children = self.get_children(category_id)

        delete_query = """
            MATCH (n {id:'%s'})
            OPTIONAL MATCH n-[r]-()
            DELETE n,r
        """
        if children:
            for record in children:
                child = record[0]
                cypher.execute_one(delete_query % child["id"])
        cypher.execute_one(delete_query % category_id)

    def get_shortest_length_to_root(self, category_id):
        query = """MATCH p=a-[:HAS_CHILD*]->n
                   WHERE n.id={id} AND a.is_root=1
                   RETURN length(p)
                   ORDER BY length(p) DESC
                   LIMIT 1"""
        cypher = self.graph.cypher
        length = cypher.execute_one(query, id=category_id)
        return length
Beispiel #43
0
def update_info_and_links():
    print 'updating show info'
    authenticate("localhost:7474", "neo4j", "1234")
    graph = Graph(GRAPH_CONNECTION_STRNIG)

    results = graph.cypher.stream("match (s:Show) return id(s) as eid,s.id")
    start_id = 0
    for record in results:
        if int(record['s.id']) < start_id:
            continue

        node_show = graph.node(record['eid'])

        result_dict = {}

        success = True
        while success:
            try:
                show_info_e_list = requests.get(
                    'http://services.tvrage.com/feeds/full_show_info.php?sid={0}'.format(node_show['id']))
                result_dict = xmltodict.parse(show_info_e_list.text)

                omdb_show_info = requests.get(
                    'http://www.omdbapi.com/?t={0}&y=&plot=full&r=json'.format(node_show['name']))
                dict_omdb_show_info = json.loads(omdb_show_info.text)
                if dict_omdb_show_info['Response'] == 'True':
                    for key, value in dict_omdb_show_info.iteritems():
                        node_show[key] = value
                success = False
            except ValueError as e:
                logger.exception("Value error")
                continue
            except Exception as e:
                logger.exception("Some network issue: will try again")
                success = True

        print str(node_show['name'])
        # info

        node_show['started'] = result_dict['Show'].get('started', None)
        node_show['ended'] = result_dict['Show'].get('ended', None)
        node_show['image'] = result_dict['Show'].get('image', None)
        node_show['status'] = result_dict['Show'].get('status', None)
        node_show.push()

        #Country
        from_country = result_dict['Show'].get('origin_country', 'unknown')

        node_country = graph.merge_one("Country", 'country', from_country)
        node_country.push()

        show_from_country = Relationship(node_show, "from", node_country)
        graph.create(show_from_country)


        #Genres
        if result_dict['Show'].get('genres', None) is not None:
            genre_list = []
            if type(result_dict['Show']['genres']['genre']) is list:
                genre_list = result_dict['Show']['genres']['genre']
            else:
                genre_list.append(result_dict['Show']['genres']['genre'])

            for genre in genre_list:
                node_genre = graph.merge_one("Genre", 'name', genre)
                node_genre.push()

                show_of_genre = Relationship(node_show, "of genre", node_genre)
                graph.create(show_of_genre)
        """
        try:
            print node_show['started']
            a = node_show['started'].split("/")
            if int(a[len(a)-1]) < 2000:
                continue
        except Exception:
            continue
        """


        #Seasons
        season_list = []
        if result_dict['Show'].get('Episodelist', None) is None:
            continue
        if type(result_dict['Show']['Episodelist']['Season']) is list:
            season_list = result_dict['Show']['Episodelist']['Season']
        else:
            season_list.append(result_dict['Show']['Episodelist']['Season'])

        for season in season_list:
            node_season = Node.cast('Season', {'no': season['@no']})
            graph.create(node_season)

            show_season = Relationship(node_show, "has", node_season)
            graph.create(show_season)

            #Episodes
            episode_list = []
            if type(season['episode']) is list:
                episode_list = season['episode']
            else:
                episode_list.append(season['episode'])
            count = 1
            for episode in episode_list:
                node_episode = Node.cast('Episode', {
                    'airdate': episode.get('airdate', None),
                    'epnum': count,
                    'screencap': episode.get('screencap', None),
                    'title': episode.get('title', None)
                })
                graph.create(node_episode)

                success = True
                while success:
                    try:
                        omdb_episode_info = requests.get('http://www.omdbapi.com/?t={0}&Season={1}&Episode={2}'
                                                         .format(node_show['name'],
                                                                 node_season['no'],
                                                                 node_episode['epnum']))
                        dict_omdb_episode_info = json.loads(omdb_episode_info.text)
                        if dict_omdb_episode_info['Response'] == 'True':
                            for key, value in dict_omdb_episode_info.iteritems():
                                node_episode[key] = value
                        node_episode.push()

                        success = False
                    except ValueError as e:
                        logger.exception("Value error")
                        continue
                    except Exception as e:
                        logger.exception("Some network issue: will try again")
                        success = True

                try:

                    search = node_show['name'] + ' s' + str(node_season['no']).zfill(2) + 'e' + str(node_episode['epnum']).zfill(2)

                    print search
                    #links
                    search_numbers = [3552639851, 8556419051, 2649486255, 7079685853, 8416818254, 1870757059,
                                      1731156253, 4545021852, 6021755051, 8975221455]

                    for n in search_numbers:
                        links_from_google = requests.get(
                            'https://www.googleapis.com/customsearch/v1element?key=AIzaSyCVAXiUzRYsML1Pv6RwSG1gunmMikTzQqY&rsz=small&num=10&hl=en&prettyPrint=false&source=gcsc&gss=.com&sig=cb6ef4de1f03dde8c26c6d526f8a1f35&cx=partner-pub-2526982841387487:{1}'
                            '&q={0}&googlehost=www.google.com&oq={0}'.format(search, n))

                        dict_from_google = json.loads(links_from_google.text)
                        for result in dict_from_google['results']:
                            node_link = Node.cast('Link', {
                                'host': result.get('visibleUrl', None),
                                'url': result['url']
                            })
                            graph.create(node_link)
                            link_episode = Relationship(node_episode, "has", node_link)
                            graph.create(link_episode)
                except Exception, err:
                    logger.exception("error grom google part")

                show_episode = Relationship(show_season, "has", node_episode)
                graph.create(show_episode)
                count = count + 1
Beispiel #44
0
class ApiProvider():
    def __init__(self, request_data):
        self._request_data = request_data
        authenticate("localhost:7474", "neo4j", "1234")
        # authenticate("52.27.227.159:7474", "neo4j", "1234")
        self.graph = Graph(GRAPH_CONNECTION_STRNIG)


    def _update_show(self, show_id):
        # get the node from the graph
        node_show = self.graph.node(show_id)
        if node_show['updated'] == True:
            return

        result_dict = {}
        try:
            show_info_e_list = requests.get(
                'http://services.tvrage.com/feeds/full_show_info.php?sid={0}'.format(node_show['id']))
            result_dict = xmltodict.parse(show_info_e_list.text)

            omdb_show_info = requests.get(
                'http://www.omdbapi.com/?t={0}&y=&plot=full&r=json'.format(node_show['name']))
            dict_omdb_show_info = json.loads(omdb_show_info.text)
            if dict_omdb_show_info['Response'] == 'True':
                for key, value in dict_omdb_show_info.iteritems():
                    node_show[key] = value
            success = False
        except ValueError as e:
            logger.exception("Value Error")
            return
        except Exception as e:
            logger.exception("Some network issue, will try again")
            return

        # add the new extracted data to the show
        node_show['started'] = result_dict['Show'].get('started', None)
        node_show['ended'] = result_dict['Show'].get('ended', None)
        node_show['image'] = result_dict['Show'].get('image', None)
        node_show['status'] = result_dict['Show'].get('status', None)
        node_show.push()

        # Country
        from_country = result_dict['Show'].get('origin_country', 'unknown')
        node_country = self.graph.merge_one("Country", 'country', from_country)
        node_country.push()

        # add the relation to the graph
        show_from_country = Relationship(node_show, "from", node_country)
        self.graph.create(show_from_country)

        # Genres
        if result_dict['Show'].get('genres', None) is not None:
            genre_list = []
            if type(result_dict['Show']['genres']['genre']) is list:
                genre_list = result_dict['Show']['genres']['genre']
            else:
                genre_list.append(result_dict['Show']['genres']['genre'])

            for genre in genre_list:
                # create the genre node
                node_genre = self.graph.merge_one("Genre", 'name', genre)
                node_genre.push()

                # add the Genre relation to the graph
                show_of_genre = Relationship(node_show, "of genre", node_genre)
                self.graph.create(show_of_genre)

        # Seasons
        season_list = []
        if result_dict['Show'].get('Episodelist', None) is None:
            return
        if type(result_dict['Show']['Episodelist']['Season']) is list:
            season_list = result_dict['Show']['Episodelist']['Season']
        else:
            season_list.append(result_dict['Show']['Episodelist']['Season'])

        for season in season_list:
            # create node for season
            node_season = Node.cast('Season', {'no': season['@no']})
            self.graph.create(node_season)

            # create the relation n the graph
            show_season = Relationship(node_show, "has", node_season)
            self.graph.create(show_season)

            # Episodes
            episode_list = []
            if type(season['episode']) is list:
                episode_list = season['episode']
            else:
                episode_list.append(season['episode'])

            count = 1
            for episode in episode_list:
                # create a node for episode
                node_episode = Node.cast('Episode', {
                    'airdate': episode.get('airdate', None),
                    'epnum': count,
                    'screencap': episode.get('screencap', None),
                    'title': episode.get('title', None)
                })
                self.graph.create(node_episode)

                # add relation to the graph
                show_episode = Relationship(show_season, "has", node_episode)
                self.graph.create(show_episode)


                # Episode info
                try:
                    omdb_episode_info = requests.get('http://www.omdbapi.com/?t={0}&Season={1}&Episode={2}'
                                                     .format(node_show['name'],
                                                             node_season['no'],
                                                             node_episode['epnum']))
                    dict_omdb_episode_info = json.loads(omdb_episode_info.text)
                    if dict_omdb_episode_info['Response'] == 'True':
                        for key, value in dict_omdb_episode_info.iteritems():
                            node_episode[key] = value
                    node_episode.push()
                except ValueError as e:
                    logger.exception("Value error")

                except Exception as e:
                    logger.exception("network issue: wil try again")

                # links
                try:
                    search = node_show['name'] + ' s' + str(node_season['no']).zfill(2) + 'e' + str(
                        node_episode['epnum']).zfill(2)

                    # links
                    search_numbers = [3552639851, 8556419051, 2649486255, 7079685853, 8416818254, 1870757059,
                                      1731156253, 4545021852, 6021755051, 8975221455]

                    for n in search_numbers:
                        links_from_google = requests.get(
                            'https://www.googleapis.com/customsearch/v1element?key=AIzaSyCVAXiUzRYsML1Pv6RwSG1gunmMikTzQqY&rsz=small&num=10&hl=en&prettyPrint=false&source=gcsc&gss=.com&sig=cb6ef4de1f03dde8c26c6d526f8a1f35&cx=partner-pub-2526982841387487:{1}'
                            '&q={0}&googlehost=www.google.com&oq={0}'.format(search, n))

                        dict_from_google = json.loads(links_from_google.text)
                        for result in dict_from_google['results']:
                            # create node for link
                            node_link = Node.cast('Link', {
                                'host': result.get('visibleUrl', None),
                                'url': result['url']
                            })
                            self.graph.create(node_link)

                            # create the relation in the graph
                            link_episode = Relationship(node_episode, "has", node_link)
                            self.graph.create(link_episode)
                except Exception, err:
                    logger.exception("error grom google part")
                count = count + 1

        # notify that all went OK and finish
        node_show['updated'] = True
        node_show.push()
Beispiel #45
0
 def handle(self, *args, **kwargs):
     graph = Graph(settings.GRAPH_URL)
     # dropping uniqueness contraints
     # graph.schema.drop_uniqueness_constraint('Company', 'id')
     # graph.schema.drop_uniqueness_constraint('User', 'id')
     # graph.schema.drop_uniqueness_constraint('User', 'email')
     # graph.schema.create_uniqueness_constraint('Visitor', 'key')
     # graph.schema.drop_uniqueness_constraint('Campaign', 'id')
     # graph.schema.drop_uniqueness_constraint('Impression', 'id')
     # graph.schema.drop_uniqueness_constraint('Postal', 'code')
     # graph.schema.drop_uniqueness_constraint('City', 'name')
     # graph.schema.drop_uniqueness_constraint('Country', 'name')
     # create initial labels
     graph.schema.create_uniqueness_constraint('Company', 'id')
     graph.schema.create_uniqueness_constraint('User', 'id')
     graph.schema.create_uniqueness_constraint('User', 'email')
     graph.schema.create_uniqueness_constraint('Visitor', 'key')
     graph.schema.create_uniqueness_constraint('Campaign', 'id')
     graph.schema.create_uniqueness_constraint('Impression', 'id')
     graph.schema.create_uniqueness_constraint('Postal', 'code')
     graph.schema.create_uniqueness_constraint('City', 'name')
     graph.schema.create_uniqueness_constraint('Country', 'name')
     # importing models
     from apps.users.models import User, Visitor
     from apps.companies.models import Company
     from apps.campaigns.models import Campaign
     from apps.impressions.models import Impression
     # importing serializers
     from apps.companies.api.serializers import BaseCompanySerializer
     from apps.campaigns.api.serializers import BaseCampaignSerializer
     from apps.impressions.api.serializers import ImpressionSerializer
     from apps.dashboard.serializers import DashboardUserSerializer
     for company in Company.objects.all():
         # ncompany = Node.cast('Company', CompanySerializer(company).data)
         print 'company: %s' %(company.id)
         ncompany = graph.merge_one('Company', 'id', company.id)
         print BaseCompanySerializer(company).data
         ncompany.properties.update(BaseCompanySerializer(company).data)
         graph.push(ncompany)
         for user in company.users.all():
             nuser = graph.merge_one('User', 'email', user.email)
             nuser.properties.update(DashboardUserSerializer(user).data)
             graph.push(nuser)
             rel = Relationship.cast(ncompany, 'CompanyUser', nuser)
             graph.create_unique(rel)
         for campaign in  company.campaigns.all():
             print 'campaign: %s' %(campaign.id)
             ncampaign = graph.merge_one('Campaign', 'id', campaign.id)
             ncampaign.properties.update(BaseCampaignSerializer(campaign).data)
             graph.push(ncampaign)
             rel = Relationship.cast(ncompany, 'CompanyCampaign', ncampaign)
             graph.create_unique(rel)
             for impression in campaign.impressions.all():
                 meta = impression.hydrate_meta
                 visitor = graph.merge_one('Visitor', 'key', meta['visitor'])
                 if meta['country']:
                     country = graph.merge_one('Country', 'name', meta['country'])
                     graph.create_unique(
                         Relationship.cast(visitor, 'CampaignVisitorCountry', country))
                     graph.create_unique(
                         Relationship.cast(visitor, 'CampaignVisitor', ncampaign))
                     graph.create_unique(
                         Relationship.cast(country, 'CampaignCountry', ncampaign))
                     if meta['city']:
                         city = graph.merge_one('City', 'name', meta['city'])
                         graph.create_unique(
                                 Relationship.cast(city, 'CampaignCity', ncampaign)
                             )
                         graph.create_unique(
                             Relationship.cast(visitor, 'VistitorCity', city))
                         graph.create_unique(
                             Relationship.cast(city, 'CityCountry', country))
                         if meta['postal_code']:
                             postal = graph.merge_one('Postal', 'code', meta['postal_code'])
                             graph.create_unique(
                                 Relationship.cast(postal, 'CityPostalCode', city))
                             graph.create_unique(
                                 Relationship.cast(postal, 'CampaignPostalCode', ncampaign))
                             graph.create_unique(
                                 Relationship.cast(visitor, 'VistorPostalCode', postal))
Beispiel #46
0
def main():
    graph = Graph()
    graph.cypher.execute("CREATE CONSTRAINT ON (user:User) ASSERT user.username IS UNIQUE" )
    graph.cypher.execute("CREATE CONSTRAINT ON (job:Job) ASSERT job.title IS UNIQUE" )
    graph.cypher.execute("CREATE CONSTRAINT ON (city:City) ASSERT city.name IS UNIQUE" )


    userFile = open("users.csv", "r")
    userFile.readline()
    lineNumber = 0
    for line in userFile.readlines():
        print("\r Processing line " + str(lineNumber), end="")
        lineNumber += 1
        parsedLine = line.split(",")
        user = Node("User", username=parsedLine[0],
                name=parsedLine[1],
                biography=parsedLine[4],
                password=bcrypt.encrypt("password"))
        graph.create(user)

        city = graph.merge_one("City", "name", parsedLine[2])
        job = graph.merge_one("Job", "title", parsedLine[3])
        livesIn = Relationship(user, "IS_FROM", city)
        hasJob = Relationship(user, "HAS_JOB_TITLE", job)

        graph.create(livesIn)
        graph.create(hasJob)

        result = graph.cypher.execute("MATCH (beer:Beer) "
                      " RETURN beer, rand() as rand "
                      " ORDER BY rand"
                      " LIMIT {range}", range=random.randrange(100,600))

        for beer in result:
            beerNode = graph.find_one("Beer", "breweryDbId", beer.beer["breweryDbId"])
            likesBrewery = Relationship(user, "LIKES", beerNode)
            graph.create(likesBrewery)


        result = graph.cypher.execute("MATCH (brewery:Brewery) "
                      " RETURN brewery, rand() as rand "
                      " ORDER BY rand"
                      " LIMIT {range}", range=random.randrange(0,10))

        for brewery in result:
            breweryNode = graph.find_one("Brewery", "breweryDbId", brewery.brewery["breweryDbId"])
            likesBrewery = Relationship(user, "LIKES", breweryNode)
            graph.create(likesBrewery)

        if lineNumber > 300:
            break


    for user in graph.find("User"):
        userNode = graph.find_one("User", "username", user["username"])
        result = graph.cypher.execute("MATCH (user:User) "
                                      "WHERE user.username <> {me}"
          " RETURN user, rand() as rand "
          " ORDER BY rand"
          " LIMIT {range}", me=userNode["username"], range=random.randrange(5,40))

        for person in result:
            dude = graph.find_one("User", "username", person.user["username"])
            buddiesWith = Relationship(userNode, "FOLLOWS", dude)
            graph.create(buddiesWith)
graph_db = Graph("http://*****:*****@localhost:7474/db/data/")

filename = 'C:/Users/Gebruiker/Downloads/ingredients.txt'
ingredients = []
with open(filename) as f:
    for line in f:
        ingredients.append(line.strip())

print(ingredients)

ingredientnumber = 0
grandtotal = 0
for ingredient in ingredients:
    try:
        IngredientNode = graph_db.merge_one("Ingredient", "Name", ingredient)
    except:
        continue

    ingredientnumber += 1
    searchbody = {
        "size": 99999999,
        "query": {
            "match_phrase": {
                "ingredients": {
                    "query": ingredient,
                }
            }
        }
    }
Beispiel #48
0
akey = ''
asecret = ''
#authorization
auth = tweepy.OAuthHandler(ckey, csecret)
auth.set_access_token(atoken, asecret)
#wait for rate limits
api = tweepy.API(auth, wait_on_rate_limit=True,
                       wait_on_rate_limit_notify=True)

while(1):
 #find Followers for Tweet User
 #check if we've already explored this user's network (DB)
 users = graph.cypher.execute("MATCH (x:User)-[:POSTS]->(t) WHERE x.Exploration='' RETURN DISTINCT x.Screen_Name")
 for r in users:
    scrname=r[0]
    x=graph.merge_one("User","Screen_Name",scrname)
    print scrname

    try:
        #find Followers for Tweet User
        followers = api.followers_ids(screen_name=scrname)
        for page in paginate(followers, 100):
            results = api.lookup_users(user_ids=page)
            for result in results:
                #Only add relationships between users that already exist in the network because of their tweets (get_tweets.py, get_live_tweets.py)
                mynode = list(graph.find('User',property_key='Screen_Name',
                               property_value=result.screen_name))
                if len(mynode) > 0:
                    # use of merge_one in order to avoid duplicates
                    y=graph.merge_one("User","Screen_Name",result.screen_name.encode('utf8'))
                    y.properties.update({"Name": result.name, "Description": result.description.encode('utf8'),"Location":result.location