Exemple #1
0
        def __init__(self, host, port, username=None, password=None, ssl=False, timeout=None):
            port = "7687" if port is None else port
            bolt_uri = "bolt://{host}".format(host=host, port=port)

            self.http_uri = "http://{host}:{port}/db/data/".format(host=host, port=port)

            if username and password:
                driver = GraphDatabase.driver(bolt_uri, auth=basic_auth(username, password), encrypted=False)
            else:
                driver = GraphDatabase.driver(bolt_uri, encrypted=False)

            self.session = driver.session()
Exemple #2
0
	def __new__(cls, *args, **kwargs):
		"""
		Return neo4j-driver ou neo4jrestclient object
		"""
		_auth = None
		if kwargs and ('user' or 'password') in list(kwargs.keys()):
			user = kwargs['user']
			password = kwargs['password']
			if 'bolt://' in cls._default_host:
				_auth = basic_auth(user, password)
			else:
				_url = 'http://{0}:{1}@localhost:7474'.format(user, password)
				cls.host = _url

		if 'bolt://' in cls._default_host:
			driver = Neo4j3.driver(cls._default_host)
			if _auth:
				driver.auth = _auth

			cls._graph = Cypher(driver)
			return cls._graph

		elif cls.host is not None and type(cls.host) is str:
			cls._graph = Neo4j2(cls.host)
			return cls._graph

		else:
			cls._graph = Neo4j2(cls._default_host)
			return cls._graph
Exemple #3
0
def run_graph(neo4j_conf, args):
    opts, args = getopt.getopt(args, "rcsa", ["related", "cluster", "similar", "all"])

    if len(args) < 1:
        raise getopt.GetoptError("Invalid graph arguments")
    query = args[0]

    stmt = ''
    for o, v in opts:
        if o in ["-r", "--related"]:
            stmt = ('match (q:Query)<-[r:RELATED]-(a:Query) where q.query={query}'
                    'return a.query, r.norm_weight order by r.norm_weight desc')
        elif o in ["-c", "--cluster"]:
            stmt = ('match (q:Query)<-[r:CLUSTER_REP]-(a:Query) where q.query={query}'
                    'return r.rank, a.query, r.query_terms order by r.rank')
        elif o in ["-s", "--similar"]:
            stmt = ('match (q:Query)-[r:SIMILAR]-(a:Query) where q.query={query}'
                    'return a.query, r.score order by r.score desc')
        elif o in ["-a", "--all"]:
            stmt = ('match (q:Query)<-[r]-() where q.query={query}'
                    'return q.query, type(r) as rel_type, count(r) as rel_count')

    if not stmt:
        raise getopt.GetoptError("Invalid graph arguments")

    graph = GraphDatabase.driver(neo4j_conf.uri, auth=basic_auth(neo4j_conf.username, neo4j_conf.password))
    session = graph.session()
    rs = session.run(stmt, parameters={'query': query})
    for r in rs:
        pprint.pprint(r)
Exemple #4
0
    def connect(self, url=None, user=None, password=None, **kw):
        """
        Parse a Neo4J URL and attempt to connect using Bolt

        Note: If the user and password arguments are provided, they
        will only be used in case no auth information is provided as
        part of the connection URL.
        """
        if url is None:
            url = 'bolt://localhost'
        if user is None:
            user = '******'
        if password is None:
            password = '******'

        try:
            protocol, url = url.split('://')
            if protocol.lower() != 'bolt':
                warnings.warn('Switching protocols. Only Bolt is supported.')
        except ValueError:
            pass

        try:
            credentials, url = url.split('@')
        except ValueError:
            kw['auth'] = basic_auth(user, password)
        else:
            kw['auth'] = basic_auth(*credentials.split(':', 1))

        self.driver = GraphDatabase.driver('bolt://%s' % url, **kw)
def export_to_neo4j():
    driver = GraphDatabase.driver("bolt://localhost:7687",
                                  encrypted=False,
                                  auth=basic_auth("neo4j", "asdzxc"))
    session = driver.session()

    for article in db.Article.objects.all():
        if article['links']:
            # session.run("CREATE (a:Article {name: {name}})",
            #             {"name": article['title']})

            for link in article['links']:
                to_article = db.Article.objects.get(id=link)
                print(to_article['title'])
                session.run("CREATE (a:Article {name: {name}})",
                            {"name": article['title']})

    #
    # result = session.run("MATCH (a:Person) WHERE a.name = {name} "
    #                    "RETURN a.name AS name, a.title AS title",
    #                    {"name": "Arthur"})
    # for record in result:
    #     print("%s %s" % (record["title"], record["name"]))
    #
    session.close()
Exemple #6
0
def main():
    parser = argparse.ArgumentParser(description="""
        Insert a Terraform state file into neo4j
    """)
    parser.add_argument('-d','--db', required=True, help="Neo4j host")
    parser.add_argument('-u','--username', required=True, help="Neo4j user")
    parser.add_argument('-p','--password', required=True, help="Neo4j password")
    parser.add_argument('state_file', help="Terraform state file")
    args = parser.parse_args()

    print args
    with open(args.state_file, 'r') as f:
        state = json.load(f)

    driver = GraphDatabase.driver("bolt://{}".format(args.db),
        auth=basic_auth(args.username, args.password))
    session = driver.session()

    # Reduce all the modules and resouces to a single array of objects
    resources = reduce( lambda a,b: a+b,
                map(lambda m: m['resources'].values(),
                    state['modules']))

    # Run actions for resources and capture hooks
    hooks = set()
    for resource in resources:
        hooks.add(insert_item(resource, session))

    # Run hooks
    for hook in hooks:
        if hook:
            hook(session)
Exemple #7
0
def test_construct_dwpc_query():
    """
    Test dwpc query construction and computation on the metapath from
    https://doi.org/10.1371/journal.pcbi.1004259.g002
    """

    directory = pathlib.Path(__file__).parent.absolute()
    path = directory.joinpath('data/hetionet-v1.0-metagraph.json')

    metagraph = hetio.readwrite.read_metagraph(path)

    compound = 'DB01156'  # Bupropion
    disease = 'DOID:0050742'  # nicotine dependency
    damping_exponent = 0.4

    metapath = metagraph.metapath_from_abbrev('CbGpPWpGaD')

    query = hetio.neo4j.construct_dwpc_query(metapath, property='identifier', unique_nodes=True)
    assert len(query) > 0
    driver = GraphDatabase.driver("bolt://neo4j.het.io")

    params = {
    'source': compound,
    'target': disease,
    'w': damping_exponent,
    }
    with driver.session() as session:
        results = session.run(query, params)
        results = results.single()
        assert results

    dwpc = results['DWPC']

    assert dwpc == pytest.approx(0.03287590886921623)
def main(argv=None):
    """Import all data in JSON file into Neo4j database."""
    parser = argparse.ArgumentParser(description="Load articles into Neo4j")
    parser.add_argument("file",
                        help="File to read",
                        type=str,
                        nargs="?",
                        metavar="FILE")
    parser.add_argument("--no-execute",
                        action="store_true")
    parse_result = parser.parse_args(argv or sys.argv[1:])

    with open_or_default(parse_result.file, sys.stdin) as fileobj:
        data = json.load(fileobj)
        commands = list(commands_from_data(data))

    if parse_result.no_execute:
        sys.stdout.write(json.dumps(commands))
    elif len(commands):
        if all(var in os.environ for
               var in ["DATABASE_URL", "DATABASE_PASS"]):
                    url = os.environ["DATABASE_URL"]
                    pwd = os.environ["DATABASE_PASS"]
                    usr = os.environ.get("DATABASE_USER", "")
        else:
            raise ValueError("Ensure environment variables DATABASE_URL, "
                             "DATABASE_PASS and DATABASE_USER set.")

        driver = GraphDatabase.driver(url, auth=basic_auth(usr, pwd))
        session = driver.session()
        for command in commands:
            session.run(command)
        session.close()
Exemple #9
0
def get_session(warehouse_home, server_name, password=None,
                encrypted=DEFAULT_ENCRYPTED,
                silence_loggers=DEFAULT_SILENCE_LOGGERS):
    if silence_loggers:
        logging.getLogger('neo4j.bolt').setLevel(logging.WARNING)

    server = neokit.Warehouse(warehouse_home).get(server_name)
    address = server.config('dbms.connector.bolt.address', 'localhost:7687')
    server_url = 'bolt://' + address

    if password:
        driver = GraphDatabase.driver(server_url, encrypted=encrypted,
                                      auth=basic_auth(DEFAULT_USER, password))
    else:
        driver = GraphDatabase.driver(server_url, encrypted=encrypted)

    session = driver.session()
    return session
Exemple #10
0
    def __init__(self):
        config = configparser.ConfigParser()
        config.read('config.ini')

        user_name = config.get('neo4j credentials', 'user_name')
        password = config.get('neo4j credentials', 'password')
        bolt_host = config.get('neo4j credentials', 'bolt_host')

        self.driver = GraphDatabase.driver(bolt_host,
                                           auth=basic_auth(user_name, password))
Exemple #11
0
def init_neo4j_connection(app):
    server_url = app.config.get('NEO4J_URL', 'bolt://localhost:7687')
    encrypted = app.config.get('NEO4J_ENCRYPTED', True)
    user = app.config.get('NEO4J_USER', 'neo4j')
    password = app.config.get('NEO4J_PASSWORD')

    auth = basic_auth(user, password) if password else None
    driver = GraphDatabase.driver(server_url,
                                  encrypted=encrypted,
                                  auth=auth)
    app.config['NEO4J_DRIVER'] = driver
Exemple #12
0
def server(ctx, host, port, debug):
    from . server import app
    config = ctx.obj.config

    from neo4j.v1 import GraphDatabase, basic_auth
    auth = basic_auth(config.neo4j.user, config.neo4j.password)
    driver = GraphDatabase.driver(config.neo4j.address, auth=auth)

    from attrdict import AttrDict
    app.minos = AttrDict({ 'config': config, 'driver': driver })
    app.run(host, port, debug)
Exemple #13
0
 def __init__(self, **kwargs):
     #super(Neo4JConn, self).__init__()
     config = {
         'host': kwargs['db_addr'],
         'port': kwargs['db_port'],
         'user': kwargs['username'],
         'password': kwargs['password']
     }
     driver = GraphDatabase.driver(
             "bolt://%s:%d" % (config['host'], config['port']),
             auth=basic_auth(config['user'], config['password']))
     self.__session = driver.session()
def neo4j():
    from neo4j.v1 import GraphDatabase, basic_auth

    driver = GraphDatabase.driver("bolt://localhost:7474", auth=basic_auth("neo4j", "neo4j"))
    session = driver.session()

    session.run("CREATE (a:Person {name:'Arthur', title:'King'})")

    result = session.run("MATCH (a:Person) WHERE a.name = 'Arthur' RETURN a.name AS name, a.title AS title")
    for record in result:
      print("%s %s" % (record["title"], record["name"]))

    session.close()
Exemple #15
0
    def set_connection(self, url):
        self.url = url
        u = urlparse(url)

        if u.netloc.find('@') > -1 and u.scheme == 'bolt':
            credentials, hostname = u.netloc.rsplit('@', 1)
            username, password, = credentials.split(':')
        else:
            raise ValueError("Expecting url format: bolt://user:password@localhost:7687"
                             " got {}".format(url))

        self.driver = GraphDatabase.driver('bolt://' + hostname,
                                           auth=basic_auth(username, password))
        self.refresh_connection()
Exemple #16
0
    def __init__(self, adress, user, password):
        """
        Creates the session to the database
        """

        self.driver = GraphDatabase.driver(adress, \
                                   auth=basic_auth(user, password))

        try:
            self.session = self.driver.session()
        except ProtocolError:
            print("Cannot connect to neo4j. Aborting.")
            exit()
        print("Connected to neo4j.")
  def __init__(self, username = None, password = None, server = None):
    if username == None or password == None:
        username, password, server = self.loadAuthCredentials()

    print(username, password, server)

    uri = "bolt://{}:{}@{}".format(username, password, server)

    print("Connecting to " + uri)

    try:
      self.conn = GraphDatabase.driver(uri, auth = (username, password))
      self.session = self.conn.session()
    except ServiceUnavailable as e:
      raise Exception(str(e))
Exemple #18
0
def create_app():
    app = Flask(__name__)
    app.debug = True
    app.config['SECRET_KEY'] = config['auth_secret']
    app.config['JWT_BLACKLIST_ENABLED'] = False
    app.config['JWT_BLACKLIST_STORE'] = simplekv.memory.DictStore()
    app.config['JWT_BLACKLIST_TOKEN_CHECKS'] = 'all'
    app.config['JWT_ACCESS_TOKEN_EXPIRES'] = datetime.timedelta(minutes=15)
    app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER


    driver = GraphDatabase.driver(config['database_url'], auth=basic_auth(config['database_user'],config['database_pass']))
    db_session = driver.session()

    # start jwt service
    jwt = JWTManager(app)

    # Import blueprints
    from auth import auth_blueprint
    from banner import banner_blueprint
    from people import people_blueprint
    from organizations import organizations_blueprint
    from repos import repositories_blueprint
    from schema import schema_blueprint
    from data import data_blueprint
    from search import search_blueprint
    from upload import upload_blueprint
    from export import export_blueprint
    from list import list_blueprint
    from .sockets import sockets as socket_blueprint

    # register API modules
    app.register_blueprint(banner_blueprint)
    app.register_blueprint(auth_blueprint)
    app.register_blueprint(people_blueprint)
    app.register_blueprint(organizations_blueprint)
    app.register_blueprint(repositories_blueprint)
    app.register_blueprint(schema_blueprint)
    app.register_blueprint(search_blueprint)
    app.register_blueprint(data_blueprint)
    app.register_blueprint(upload_blueprint)
    app.register_blueprint(socket_blueprint)
    app.register_blueprint(export_blueprint)
    app.register_blueprint(list_blueprint)

    x_socketio.init_app(app)
    return app, jwt
    def __init__(self, authfile, session_name):
        self._authfile = authfile
        self._session_name = session_name
        self._num_variables = 0
        self._num_landmarks = 50
        self._pose_to_odometry_or_prior = {}
        self._pose_to_measurements = {}
        self._landmark_to_prior = {}
        self._pose_ids = []
        self._max_factor_id = 0

        # initialize Neo4j session
        self.username, self.password, self.DB_address = open(self._authfile).read().splitlines()
        print self.username, self.password, self.DB_address
        self.driver = GraphDatabase.driver(self.DB_address, auth=basic_auth(self.username, self.password))
        self.session = self.driver.session()
        self.session.run("MATCH (n:" + self._session_name + ") DETACH DELETE n")
Exemple #20
0
    def __init__(self, driver=None, uri=None,
                 user=None, password=None,
                 node_label="node",
                 edge_label="edge",
                 unique_node_ids=True):
        """Initialize Neo4jGraph object.

        Parameters
        ----------
        driver : neo4j.v1.direct.DirectDriver, optional
            Driver providing connection to a Neo4j database
        uri : str, optional
            Uri for a new Neo4j database connection (bolt)
        user : str, optional
            Username for the Neo4j database connection
        password : str, optional
            Password for the Neo4j database connection
        node_label : optional
            Label of nodes inducing the subgraph to scope.
            By default `"node"`.
        edge_label : optional
            Type of relations inducing the subgraph to scope.
            By default `"edge"`.
        unique_node_ids : bool, optional
            Flag, if True the uniqueness constraint on the property
            'id' of nodes is imposed, by default True

        If database driver is provided, uses it for
        connecting to database, otherwise creates
        a new driver object using provided credentials.
        """
        if driver is None:
            self._driver = GraphDatabase.driver(
                uri, auth=(user, password))
        else:
            self._driver = driver

        self._node_label = node_label
        self._edge_label = edge_label
        self.unique_node_ids = unique_node_ids
        if unique_node_ids:
            try:
                self.set_constraint('id')
            except:
                warnings.warn(
                    "Failed to create id uniqueness constraint")
Exemple #21
0
    def set_connection(self, url):
        self.url = url
        u = urlparse(url)

        if u.netloc.find('@') > -1 and u.scheme == 'bolt':
            credentials, hostname = u.netloc.rsplit('@', 1)
            username, password, = credentials.split(':')
        else:
            raise ValueError("Expecting url format: bolt://user:password@localhost:7687"
                             " got {}".format(url))

        self.driver = GraphDatabase.driver('bolt://' + hostname,
                                           auth=basic_auth(username, password),
                                           encrypted=config.ENCRYPTED_CONNECTION,
                                           max_pool_size=config.MAX_POOL_SIZE)
        self._pid = os.getpid()
        self._active_transaction = None
    def __init__(self, settings_file_name = None, working_directory = None):
        super().__init__(settings_file_name, working_directory = working_directory)

        # Read secret data file
        secret_data_file_name = self.get_setting("secret_data_file_name")
        with open(os.path.join(self.working_directory, os.path.normpath(secret_data_file_name)), "r") as file_:
            fileData = file_.read()
        secret_data = json.loads(fileData)

        # Initialize the graph database
        self._db = GraphDatabase.driver("bolt://localhost", auth=basic_auth(secret_data["neo4j_user_name"], secret_data["neo4j_password"]))
        self.orion_ns = "http://www.orion-research.se/ontology#"
        
        # Initialize proxies
        self.authentication_proxy = self.create_proxy(self.get_setting("authentication_service"))
        
        self.ontology = None
def _get_cand_spec(mass, tol):
    '''Gets candidate spectra.'''
    cand_spec = []
    query = 'MATCH (s:Spectrum)-[]-(c:Chemical)' + \
        ' WHERE c.monoisotopic_mass > {start_mass}' + \
        ' AND c.monoisotopic_mass < {end_mass}' + \
        ' RETURN c, s'

    driver = GraphDatabase.driver("bolt://localhost")
    session = driver.session()

    result = session.run(query, {'start_mass': mass - tol,
                                 'end_mass': mass + tol})

    for record in result:
        cand_spec.append([record['c'], record['s']])

    return cand_spec
Exemple #24
0
def setup_gdatabase_conn():
    """Function to setup the database connection to the active Neo4j project meant to contain the
    ODIN data.
    """
    try:
        database_uri = config_section_map("GraphDatabase")["uri"]
        database_user = config_section_map("GraphDatabase")["username"]
        database_pass = config_section_map("GraphDatabase")["password"]
        click.secho("[*] Attempting to connect to your Neo4j project using {}:{} @ {}."
                    .format(database_user,database_pass,database_uri),fg="yellow")
        neo4j_driver = GraphDatabase.driver(database_uri,auth=(database_user,database_pass))
        click.secho("[+] Success!",fg="green")
        return neo4j_driver
    except Exception:
        neo4j_driver = None
        click.secho("[!] Could not create a database connection using the details provided in \
your database.config! Please check the URI, username, and password. Also, make sure your Neo4j \
project is running. Note that the bolt port can change.",fg="red")
        exit()
 def _get_db_driver(uri, username=None, password=None, encrypted=True, max_pool_size=50, trust=TRUST_DEFAULT):
     """
     :param uri: Bolt uri
     :type uri: str
     :param username: Neo4j username
     :type username: str
     :param password: Neo4j password
     :type password: str
     :param encrypted: Use TLS
     :type encrypted: Boolean
     :param max_pool_size: Maximum number of idle sessions
     :type max_pool_size: Integer
     :param trust: Trust cert on first use (0) or do not accept unknown cert (1)
     :type trust: Integer
     :return: Neo4j driver
     :rtype: neo4j.v1.session.Driver
     """
     return GraphDatabase.driver(uri, auth=basic_auth(username, password), encrypted=encrypted,
                                 max_pool_size=max_pool_size, trust=trust)
    def handle(self, *args, **options):
        driver = GraphDatabase.driver(settings.NEO4J_BOLT_URL, auth=basic_auth(
            settings.NEO4J_USER, settings.NEO4J_PASSWORD)
        )
        session = driver.session()

        # TODO: Figure out a way to not delete the whole graph db every time
        # session.run("MATCH (n) DETACH DELETE n")
        # FIXED by using MERGE/SET statements

        user_list = [
            user[0] for user in User.objects.all().values_list('username')
        ]

        # user_list = [
        #     'aprilchomp', 'jsatt', 'mrmakeit', 'jgmize', 'groovecoder'
        # ]

        repo_types = {
            'repositories': 'OWNER',
            'starredRepositories': 'STARRED',
            'contributedRepositories': "CONTRIBUTED"
        }
        for username in user_list:
            if username == u'admin':
                continue

            try:
                gh_user = ghUser.get(login=username)['user']
                neo4j_merge_user(gh_user, session)
                for repo_type in repo_types.keys():
                    repos = RepoList(type=repo_type, login=username)
                    for repo in repos:
                        repo_values = repo['node']
                        neo4j_merge_repo(repo_values, session)
                        neo4j_match_repo_relationship(
                            gh_user['id'], repo_values['id'],
                            repo_types[repo_type], session
                        )
            except Exception as e:
                logger.error("load_user_github_graph, error: %s" % e)

        session.close()
Exemple #27
0
    def __init__(self, sessname):
        self.idx_ = 0 # odom_index
        self.sessname = sessname
        ## Authentication and Setup for Neo4j
        authfile = '/home/dehann/neo_authfile.txt' # username on one line, password on next (for database)
        un,pw, addr = open(authfile).read().splitlines()
        self.driver = GraphDatabase.driver(addr, auth=basic_auth(un, pw))
        self.session = self.driver.session()
        self.session.run("MATCH (n:"+self.sessname+") DETACH DELETE n") # REMOVE ALL "+self.sessname+" NODES
        self.odom_diff = None
        self.old_odom = None
        self.odom_node_id = None # neo4j node id

        ## Authentication/Setup for Mongo
        mongo_authfile = "/home/dehann/mongo_authfile.txt"
        maddr = open(mongo_authfile).read().splitlines()
        print maddr
        client = MongoClient(maddr) # Default is local for now
        self.db = client.CloudGraphs # test is the name of the data base
Exemple #28
0
def get_db_client(dbhost, dbuser, dbpass, bolt=False):
    """Return a Neo4j DB session. bolt=True uses bolt driver"""    

    if verbose > 4:
        print("DB Creds", dbhost, dbuser, dbpass)

    if bolt:
        bolt_url = "bolt://" + dbhost
        auth_token = basic_auth(dbuser, dbpass)
        try:
            driver = GraphDatabase.driver(bolt_url, auth=auth_token, max_pool_size=5)
            bolt_session = driver.session()
            return bolt_session
        except Exception as e:
            print("Database connection/authentication error:", e)
            sys.exit(1)
    else:
        login = "******".format(dbuser, dbpass, dbhost)
        py2neo_session = Graph(login)
        return py2neo_session
Exemple #29
0
def create_database(infile):
	driver = GraphDatabase.driver("bolt://localhost:7687", auth=basic_auth("neo4j", "password1!"))
	session = driver.session()

        print "Infile from ingester.py: %s" % infile
	#cypher query to create unique source MAC address nodes
	session.run("USING PERIODIC COMMIT LOAD CSV WITH HEADERS FROM 'http://localhost:8080/" + infile + "' AS row FIELDTERMINATOR '|' WITH row WHERE row.`eth.src` IS NOT NULL MERGE (n:Node {eth: row.`eth.src`}) ON CREATE SET n.eth=row.`eth.src`")
	#cypher query to create unique source IP address nodes
	session.run("USING PERIODIC COMMIT LOAD CSV WITH HEADERS FROM 'http://localhost:8080/" + infile + "'AS row FIELDTERMINATOR '|' WITH row WHERE row.`ip.src` IS NOT NULL MERGE (n:IP {ip: row.`ip.src`}) ON CREATE SET n.ip=row.`ip.src`") 
	#cypher query to create unique destination MAC address nodes
	session.run("USING PERIODIC COMMIT LOAD CSV WITH HEADERS FROM 'http://localhost:8080/" + infile + "' AS row FIELDTERMINATOR '|' WITH row WHERE row.`eth.dst` IS NOT NULL MERGE (n:Node {eth: row.`eth.dst`}) ON CREATE SET n.eth=row.`eth.dst`")
	#cypher query to create unique destination IP address nodes
	session.run("USING PERIODIC COMMIT LOAD CSV WITH HEADERS FROM 'http://localhost:8080/" + infile + "' AS row FIELDTERMINATOR '|' WITH row WHERE row.`ip.dst` IS NOT NULL MERGE (n:IP {ip: row.`ip.dst`}) ON CREATE SET n.ip=row.`ip.dst`")
	#cypher query to create relationship between source MAC address and source IP address in the PCAP file
	session.run("USING PERIODIC COMMIT LOAD CSV WITH HEADERS FROM 'http://localhost:8080/" + infile + "' AS row FIELDTERMINATOR '|' WITH row WHERE row.`eth.src` IS NOT NULL AND row.`ip.src` IS NOT NULL MATCH (n:Node) MATCH (m:IP) WHERE n.eth=row.`eth.src` AND m.ip=row.`ip.src` MERGE (m)-[:HAS_MAC]->(n)")
	#cypher query to create relationship between destination MAC address and destination IP address in PCAP file
	session.run("USING PERIODIC COMMIT LOAD CSV WITH HEADERS FROM 'http://localhost:8080/" + infile + "' AS row FIELDTERMINATOR '|' WITH row WHERE row.`eth.src` IS NOT NULL AND row.`ip.src` IS NOT NULL MATCH (n:Node) MATCH (m:IP) WHERE n.eth=row.`eth.dst` AND m.ip=row.`ip.dst` MERGE (m)-[:HAS_MAC]->(n)")
	#cypher query to draw relationship between source and destination nodes in the PCAP file
        session.run("USING PERIODIC COMMIT LOAD CSV WITH HEADERS FROM 'http://localhost:8080/" + infile + "' AS row FIELDTERMINATOR '|' WITH row WHERE row.`eth.src` IS NOT NULL AND row.`eth.dst` IS NOT NULL MATCH (n:Node) WHERE n.eth=row.`eth.src` MATCH (m:Node) WHERE m.eth=row.`eth.dst` CREATE (n)-[:TALKS_TO {protocol: row.`_ws.col.Protocol`, info: row.`_ws.col.Info`, data: row.`data`, length: row.`frame.len`, srcport: row.`tcp.srcport`, dstport: row.`tcp.dstport`}]->(m)")
	
	session.close()
Exemple #30
0
    password="******"
)
# 创建节点
node1 = Node('Customer', name='John',age=18,phone=2232)
node2 = Node('Customer', name='Lily',age=22,phone=9921)
node3 = Node('Customer', name='Cathy',age=52,phone=7100)
test_graph.create(node1)
test_graph.create(node2)
test_graph.create(node3)
# 创建节点2
arr = np.array([['John','Lily','Ben','Mark'],['189101','234220','019018','330682'],[11,23,56,28]])
df = pd.DataFrame(arr.transpose(),columns=['name','phone_no','age'])
for i, j, k in df.values:
    node1 = Node('Person',name=i,phone_no=j,age=k)
    graph.create(node1)
# neo4j.v1操作
driver = GraphDatabase.driver("bolt://localhost:7687", auth=basic_auth("neo4j", "z123456789"))
session = driver.session()
# 创建节点3
arr = np.array([['John','Lily','Ben','Mark'],['189101','234220','019018','330682'],[11,23,56,28]])
df = pd.DataFrame(arr.transpose(),columns=['name','phone_no','age'])
#    name   phone_no    age
# 0  John   189101      11
# 1  Lily   234220      23
# 2  Ben    019018      56
# 3  Mark   330682      28
# dataframe to dict操作
dic = {'events':df.to_dict('records')}
session.run("unwind {events} as event merge (n:Person{name:event.name,phone_no2:event.phone_no,age: event.age})",dic)
# 删除所有节点和边
test_graph.delete_all()
Exemple #31
0
def neo4j_edge_summary(config, address, username, password, output=None):
    if output is not None and not is_writable(output):
        error(f'Cannot write to {output}')

    bolt_driver = GraphDatabase.driver(address, auth=(username, password))

    query = """
    MATCH (x) RETURN DISTINCT x.category AS category
    """

    with bolt_driver.session() as session:
        records = session.run(query)

    categories = set()

    for record in records:
        category = record['category']
        if isinstance(category, str):
            categories.add(category)
        elif isinstance(category, (list, set, tuple)):
            categories.update(category)
        elif category is None:
            continue
        else:
            error('Unrecognized value for node.category: {}'.format(category))

    categories = list(categories)

    query = """
    MATCH (n)-[r]-(m)
    WHERE
        (n.category = {category1} OR {category1} IN n.category) AND
        (m.category = {category2} OR {category2} IN m.category)
    RETURN DISTINCT
        {category1} AS subject_category,
        {category2} AS object_category,
        type(r) AS edge_type,
        split(n.id, ':')[0] AS subject_prefix,
        split(m.id, ':')[0] AS object_prefix,
        COUNT(*) AS frequency
    ORDER BY subject_category, object_category, frequency DESC;
    """

    combinations = [(c1, c2) for c1 in categories for c2 in categories]

    rows = []
    with click.progressbar(combinations, length=len(combinations)) as bar:
        for category1, category2 in bar:
            with bolt_driver.session() as session:
                records = session.run(query,
                                      category1=category1,
                                      category2=category2)

                for r in records:
                    rows.append({
                        'subject_category': r['subject_category'],
                        'object_category': r['object_category'],
                        'subject_prefix': r['subject_prefix'],
                        'object_prefix': r['object_prefix'],
                        'frequency': r['frequency']
                    })

    df = pd.DataFrame(rows)
    df = df[[
        'subject_category', 'subject_prefix', 'object_category',
        'object_prefix', 'frequency'
    ]]

    if output is None:
        with pd.option_context('display.max_rows', None, 'display.max_columns',
                               None):
            click.echo(df)
    else:
        df.to_csv(output, sep='|', header=True)
        click.echo('Saved report to {}'.format(output))
import os
import time
import shp2nx
import undir2dir
from neo4j.v1 import GraphDatabase

start = time.time()

uri = "bolt://localhost:7687"
driver = GraphDatabase.driver(uri, auth=("neo4j", "19891202"))
with driver.session() as session:
    # first get all the substation names from the database
    result = session.run("MATCH (s:substation) return s.netid")
    temp_netids = []
    for data in result.data():
        temp_netids.append(int(data['s.netid']))
    temp_netids.sort()
    for netid in temp_netids:
        print(netid)
        session.run("MATCH p = shortestPath((s:substation)-[*]-(b:building)) \
                    where s.netid = %s and b.netid = %s return p" %
                    (netid, netid))

end = time.time()

cost = end - start

print("Spent %f seconds" % cost)
import os

from neo4j.v1 import GraphDatabase, basic_auth

host = os.environ.get("NEO4J_HOST", "bolt://localhost")
user = os.environ.get("NEO4J_USER", "neo4j")
password = os.environ.get("NEO4J_PASSWORD", "neo")
driver = GraphDatabase.driver(host, auth=basic_auth(user, password))


def clear_db():
    with driver.session() as session:
        session.run("MATCH (n) DETACH DELETE n")


clear_db()
Exemple #34
0
# Adding Compound-Protein relationships pulled out of DrugBank to SPOKE
import requests
from bs4 import BeautifulSoup
from neo4j.v1 import GraphDatabase, basic_auth

driver = GraphDatabase.driver("bolt://msgap1.ucsf.edu/:7687",
                              auth=basic_auth("kbharat96", "tejas320"))
session = driver.session()

## UniProt File Schema:
## ID,Name,Gene Name,GenBank Protein ID,GenBank Gene ID,UniProt ID,Uniprot Title,PDB ID,GeneCard ID,GenAtlas ID,HGNC ID,Species,Drug IDs

file = open('write-db-prot_4.csv', 'r')  # Created using DrugBank Website

# Schema of Document created: 'ProteinID,ProteinType,DrugBankID,DrugName,DrugGroup,Pharm_Action,Drug_Actions'
# All identifiers pulled from DrugBank are in the DB Format


def cat(protid, dbid, dbaction):
    return "MATCH (c:Compound), (p:Protein) WHERE c.identifier = '" + dbid + "' and p.identifier = '" + protid + "' CREATE (c)-[r:INTERACTS_CiP]->(p) SET r.source = 'DrugBank', r.action_type = '" + dbaction + "', r.license = 'CC BY-ND 3.0'"


record_cip = session.run(
    "MATCH (c:Compound)-[r:INTERACTS_CiP]->(p:Protein) return c.identifier as cmpd, c.chembl_id as chembl, p.identifier as prot"
)
record_db = session.run(
    "MATCH (c:Compound) where c.name contains '' and exists (c.drugbank_id) return c.identifier as id"
)

db_exists = []
for id_db in record_db:
Exemple #35
0
 def test_bolt_uri_constructs_direct_driver(self):
     with StubCluster({9001: "empty.script"}):
         uri = "bolt://127.0.0.1:9001"
         with GraphDatabase.driver(uri, auth=self.auth_token, encrypted=False) as driver:
             assert isinstance(driver, DirectDriver)
 def test_custom_ca_not_implemented(self):
     with self.assertRaises(NotImplementedError):
         _ = GraphDatabase.driver(self.bolt_uri, auth=self.auth_token,
                                  trust=TRUST_CUSTOM_CA_SIGNED_CERTIFICATES)
 def test_should_fail_on_incorrect_password(self):
     with self.assertRaises(AuthError):
         with GraphDatabase.driver(self.bolt_uri, auth=("neo4j", "wrong-password")) as driver:
             with driver.session() as session:
                 _ = session.run("RETURN 1")
Exemple #38
0
"""
file: app.py
author: Kritka Sahni, Jitesh Fulwariya, Palash Kumar Koutu, Thomas Binu
Description: This file contains API end-points which can be hit by front-end. These
end-points contains various queries to be executed on Neo4j database.
"""

# import statements here
from flask import Flask, g, Response, request, jsonify, render_template
from neo4j.v1 import GraphDatabase, basic_auth
from json import dumps
import logging

app = Flask(__name__, static_url_path='/static/')

driver = GraphDatabase.driver('bolt://localhost',
                              auth=basic_auth("neo4j", "root"))

logging.basicConfig(level=logging.DEBUG)


# Get the index file
@app.route("/")
def get_index():
    return render_template('index.html')


# Serialize person and company as together in object
def serialize_person_and_company(record,
                                 person_key='person',
                                 company_key='company'):
    person = record[person_key]
 def test_routing_driver_not_compatible_with_tofu(self):
     with self.assertRaises(ValueError):
         _ = GraphDatabase.driver(self.bolt_routing_uri, auth=self.auth_token, trust=TRUST_ON_FIRST_USE)
Exemple #40
0
from neo4j.v1 import GraphDatabase, basic_auth
import requests
import json

driver = GraphDatabase.driver("bolt://localhost", auth=basic_auth("",""), encrypted=False)
session = driver.session()

with open('credentials.json') as f:
    credentials = json.loads(f.read())

client_id = credentials["client_id"]
client_secret = credentials["client_secret"]

repository = "neo4j/neo4j"
contents_url = "https://api.github.com/repos/" + repository + "/contents/"

def getDirectoryContent(path):
    url = contents_url + path
    response = requests.get(url)

    return response.text

def traverse(currentDir):
    files = []
    contents = json.loads(getDirectoryContent(currentDir+"?client_id=" + client_id + "&client_secret=" + client_secret))
    for elt in contents:
        if elt["type"] == "dir":
            traverse(elt["path"])
        elif elt["type"] == "file":
          files.append(elt["path"])
Exemple #41
0
import json
import datetime

# PARAMETERS
NEO4J_URL = "bolt://localhost:7687"
NEO4J_USER = "******"
NEO4J_PWD = "firstlife2014"

MONGO_URL = 'mongodb://localhost:27017/'
MONGO_DB = "test"
MONGO_COLLECTION = "areas"
# END PARAMETERS

uri = NEO4J_URL
driver = GraphDatabase.driver(uri, auth=(NEO4J_USER, NEO4J_PWD))
session = driver.session()

client = MongoClient(MONGO_URL)
db = client[MONGO_DB]
collection = db[MONGO_COLLECTION]

fErrors = open("errors.log", "w")
fSkip = open("edgeSkip.log", "w")
fEdge = open("edges.log", "w")


def log(handler, message):
    handler.write(
        ("[" + str(datetime.datetime.utcnow()) + "] " + message).encode(
            'ascii', 'ignore'))
Exemple #42
0
"""
Sets per orthogroup statistics on the GO terms in a Neo4j database and reports these in a tsv file on level 1 and 2 of GO.
python3 goGrouping.py interproscan.xml *.fasta
"""
import sys
from collections import defaultdict
from neo4j.v1 import GraphDatabase, basic_auth

hostname = "wurnfs"
username = "******"
password = "******"

driver = GraphDatabase.driver("bolt://{}".format(hostname),
                              auth=basic_auth(username, password))
session = driver.session()

currentGene = ""
processed = set()

orthoGroup = sys.argv[1].split(".")[0]
orthoData = ''.join(open(sys.argv[1], "r").readlines())

session.run("create index on :GOTerm({})".format(orthoGroup))
session.run("match (a:GOTerm) set a.{} = 0".format(orthoGroup))
session.run("match (a:GOTerm) set a.all{} = 0".format(orthoGroup))

for f in sys.argv[3:]:
    proteinMapping = {}
    orgName = f.split("/")[-1].split(".")[0]
    print(orgName)
    for l in open(f, "r"):
Exemple #43
0
import sys
from neo4j.v1 import GraphDatabase

from pprint import pprint

# Object Loader
sys.path.insert(0, '../WebScrapper')
from obj_loader import read_skills_file
uri = "bolt://433-06.csse.rose-hulman.edu:7688"
driver = GraphDatabase.driver(uri, auth=("neo4j", "huntallthemonsters247"))
allSkills = read_skills_file()

allSkills[0][0]['Skills'][0]['Description'] = 'None'


def add_skills():
    with driver.session() as session:
        with session.begin_transaction() as tx:
            for attribute in allSkills[0]:
                tx.run(
                    "MERGE (a: Attribute {id: $id, Name: $name})"
                    "RETURN a",
                    id=attribute['id'],
                    name=attribute['Name'])

                for skill in attribute['Skills']:
                    #print(skill)
                    tx.run(
                        "MERGE (s: Skill {Name: $name, Skill_Req: $Skill_Req, Description: $Description})"
                        "RETURN s",
                        name=skill['Name'],
import requests
from neo4j.v1 import GraphDatabase

print("hallo")


def create_event(tx, id, type):
    result = tx.run(
        "CREATE (e:Event) "
        "SET e.id = $id, "
        " e.type = $type "
        "RETURN e",
        id=id,
        type=type)
    return result.single()[0]


r = requests.get('https://api.github.com/events')
json = r.json()

driver = GraphDatabase.driver('bolt://localhost:7687', auth=('neo4j', 'neo'))

with driver.session() as session:
    for e in json:
        session.write_transaction(create_event, e['id'], e['type'])

driver.close()
Exemple #45
0
from neo4j.v1 import GraphDatabase
driver = GraphDatabase.driver(
    "bolt://neo4j-core-0.neo4j.neo4j-cluster.svc.cluster.local:7687",
    auth=("neo4j", "password"))


def add_friends(tx, name, friend_name):
    tx.run(
        "MERGE (a:Person {name: $name}) "
        "MERGE (a)-[:KNOWS]->(friend:Person {name: $friend_name})",
        name=name,
        friend_name=friend_name)


def print_friends(tx, name):
    for record in tx.run(
            "MATCH (a:Person)-[:KNOWS]->(friend) WHERE a.name = $name "
            "RETURN friend.name ORDER BY friend.name",
            name=name):
        print(record["friend.name"])


with driver.session() as session:
    session.write_transaction(add_friends, "Arthur", "Guinevere")
    session.write_transaction(add_friends, "Arthur", "Lancelot")
    session.write_transaction(add_friends, "Arthur", "Merlin")
    session.read_transaction(print_friends, "Arthur")
 def setup_class(cls):
     cls.server = server = GraphDatabaseServer()
     server.start()
     cls.driver = GraphDatabase.driver(server.bolt_uri,
                                       auth=server.auth_token)
Exemple #47
0
 def test_direct_should_reject_routing_context(self):
         uri = "bolt://127.0.0.1:9001/?name=molly&age=1"
         with self.assertRaises(ValueError):
             GraphDatabase.driver(uri, auth=self.auth_token, encrypted=False)
sys.path.append("/Users/be15516/projects/melodi/")

import config

#3 steps
#1. Convert each sql table to a pipe separated format
#	for i in *sql.gz; do echo $i; python ~/scripts/bristol/mysql_to_csv.py <(gunzip -c $i) | gzip > ${i%%.*}.psv.gz; done
#2. Get rid of double quotest in citations
#	gunzip -c semmedVER30_R_CITATIONS_to12312016.csv.gz | sed "s/'//g" | gzip > semmedVER30_R_CITATIONS_to12312016_edit.csv.gz
#3. Add new data - change file locations in script and run this script
#	python browser/management/add_new_semmed.py

#neo4j
from neo4j.v1 import GraphDatabase,basic_auth
auth_token = basic_auth(config.user, config.password)
driver = GraphDatabase.driver("bolt://"+config.server+":"+config.port,auth=auth_token)

#files
baseDir='/Users/be15516/mounts/rdfs_be15516/data/SemMedDB/v30_R_31-12-16/'
#SemMed
semCitation = baseDir+'semmedVER30_R_CITATIONS_to12312016_edit.csv.gz'
semPA = baseDir+'semmedVER30_R_PREDICATION_to12312016.csv.gz'

old_pmids='data/old_pmids.txt.gz'
new_pmids='data/new_pmids.txt.gz'

#getData metrics
#memory: 2973Mb
#Time taken: 16 minutes
def getData():
	print "Getting PubMed data from MELODI graph..."
from neo4j.v1 import GraphDatabase
import math

driver = GraphDatabase.driver("bolt://localhost", auth=("neo4j", "neo"))

base_css = {}
base_css["node"] = {
    "diameter": "50px",
    "color": "#A5ABB6",
    "border-color": "#9AA1AC",
    "border-width": "2px",
    "text-color-internal": "#FFFFFF",
    "font-size": "10px"
}

base_css["relationship"] = {
    "color": "#A5ABB6",
    "text-color-external": "#000000",
    "text-color-internal": "#FFFFF",
    "shaft-width": "1px",
    "font-size": "8px",
    "padding": "3px",
    "caption": "\"{type}\"",
}

# node.Set_6 {
#   defaultCaption: "<id>";
#   color: #68BDF6;
#   border-color: #5CA8DB;
#   text-color-internal: #FFFFFF;
#   caption: "{id}";
Exemple #50
0
 def __init__(self,uri,username,password,session_id_input):
     self._uri = uri
     self._username = username
     self._password = password
     self._driver = GraphDatabase.driver(self._uri,auth=(self._username,self._password))
     self._session_id = session_id_input
Exemple #51
0
 def server_version_info(cls):
     with GraphDatabase.driver(cls.bolt_uri, auth=cls.auth_token) as driver:
         with driver.session() as session:
             full_version = session.run("RETURN 1").summary().server.version
             return ServerVersion.from_str(full_version)
# import dateparser
import requests
from joblib import Parallel, delayed
import multiprocessing
import collections
import warnings
import time
import probablepeople as pp

# os.chdir('/')


####################################
# neo4j connection
####################################
driver = GraphDatabase.driver("bolt://ec2-13-228-37-181.ap-southeast-1.compute.amazonaws.com:7687",
                              auth=basic_auth("neo4j", "chenziao"))
session = driver.session()

####################################
# DF connection
####################################
url = 'https://dds-test.thomsonreuters.com/datafusion/'
user = '******'
pwd = 'jiaming'


def get_token_headers():
    headers = {'Content-Type': 'application/json'}
    data = {'username': user, 'password': pwd}
    r = requests.post(url + 'oauth/token', headers=headers, json=data, verify=False).json()
    headers['Accept'] = 'application/json'
Exemple #53
0
 def setUp(self):
     from neo4j.v1 import GraphDatabase
     self.driver = GraphDatabase.driver(self.bolt_routing_uri,
                                        auth=self.auth_token)
Exemple #54
0
                        help='Neo4j host, default \'bolt://localhost\'')
    parser.add_argument('--neo4j_user', default='neo4j',
                        help='Neo4j username, default \'neo4j\'')
    parser.add_argument('--neo4j_pw', help='Neo4j password')
    parser.add_argument('--infile', default='artists.txt',
                        help='List of artist wikipedia pages')
    parser.add_argument('--outfile', default='graph.npz',
                        help='File to output graph distance matrix to')

    return parser.parse_args()


if __name__ == '__main__':
    args = parse_args()

    n4j = GraphDatabase.driver(
        args.neo4j_host, auth=(args.neo4j_user, args.neo4j_pw))

    with open(args.infile, 'r') as fh:
        artists = fh.read().strip().split('\n')
        artist_indices = {artist: i for i, artist in enumerate(artists)}

    # adj_matrix = csr_matrix((len(artists), len(artists)))
    # adj_matrix = np.zeros((len(artists), len(artists)))
    rows, cols, vals = [], [], []

    print('Querying edges')
    with open(args.outfile, 'a') as fh:
        with n4j.session() as session:
            edges = session.run(GRAPH_QUERY)
            print('Building adjacency matrix')
            for edge in edges:
Exemple #55
0
def main(argv):

    # Ottengo la stringa relativa al file da processare
    input_file = argv[0]
    temp_folder = argv[1]
    username = argv[2]
    experiment = argv[3]
    species = argv[4]

    config = json.load(open('../configuration.json'))

    temp_token = username + '_' + str(uuid.uuid4())

    # Creo i csv per memorizzare le informazioni sui nodi
    variant_csv = open(temp_folder + temp_token + '_variant.csv', 'w')
    info_csv = open(temp_folder + temp_token + '_info.csv', 'w')
    genotype_csv = open(temp_folder + temp_token + '_genotype.csv', 'w')
    gene_csv = open(temp_folder + temp_token + '_gene.csv', 'w')
    chromosome_csv = open(temp_folder + temp_token + '_chromosome.csv', 'w')

    # Creo i csv per memorizzare le informazioni sulle relazioni
    of_species_csv = open(temp_folder + temp_token + '_of_species.csv', 'w')
    contains_csv = open(temp_folder + temp_token + '_contains.csv', 'w')
    supported_by_csv = open(temp_folder + temp_token + '_supported_by.csv',
                            'w')
    for_variant_csv = open(temp_folder + temp_token + '_for_variant.csv', 'w')
    in_variant_csv = open(temp_folder + temp_token + '_in_variant.csv', 'w')
    has_variant_csv = open(temp_folder + temp_token + '_has_variant.csv', 'w')
    in_chromosome_csv = open(temp_folder + temp_token + '_in_chromosome.csv',
                             'w')

    # Inizializzo i writer per tutti i file

    # ---- nodi
    variantWriter = csv.writer(variant_csv, delimiter=',')
    infoWriter = csv.writer(info_csv, delimiter='\t')
    genotypeWriter = csv.writer(genotype_csv, delimiter=',')
    geneWriter = csv.writer(gene_csv, delimiter=',')
    chromosomeWriter = csv.writer(chromosome_csv, delimiter=',')

    # ---- relazioni
    ofSpeciesWriter = csv.writer(of_species_csv, delimiter=',')
    containsWriter = csv.writer(contains_csv, delimiter=',')
    supportedByWriter = csv.writer(supported_by_csv, delimiter='\t')
    forVariantWriter = csv.writer(for_variant_csv, delimiter=',')
    inVariantWriter = csv.writer(in_variant_csv, delimiter=',')
    hasVariantWriter = csv.writer(has_variant_csv, delimiter=',')
    inChromosomeWriter = csv.writer(in_chromosome_csv, delimiter=',')

    # Apro il file vcf
    print 'Opening .vcf file...'
    file = open(input_file, 'r')
    reader = vcf.Reader(file, encoding='utf-8')

    # Costruisco gli header dei file

    # ---- nodi
    variant_header = ["variant_id", "CHROM", "POS", "REF", "ALT", "MUTATION"]
    genotype_header = ["sample"]
    info_header = [
        "info_id", "END", "ID", "QUAL", "FILTER", "FORMAT", "HETEROZIGOSITY",
        "dbSNP", "DP", "Gene_refGene", "Func_refGene", "QD", "SIFT_score",
        "otg_all", "NM", "LM", "FS", "MQ0", "attributes"
    ]
    gene_header = ["gene_id"]
    chromosome_header = ["chromosome"]

    # ---- relazioni
    contains_header = ["name", "info_id"]
    for_variant_header = ["info_id", "variant_id"]
    of_species_header = ["sample", "species"]
    in_variant_header = ["gene_id", "variant_id"]
    has_variant_header = ["chromosome", "variant_id"]
    in_chromosome_header = ["gene_id", "chromosome"]
    supported_by_header = [
        "info_id", "sample", "phased", "state", "attributes"
    ]

    # Inizializzo le strutture dati necessarie al parsing (per ottimizzare il caricamento dei dati su database)

    # ---- nodi
    genotypes = set()
    genes = set()
    chromosomes = set()

    # Scrivo gli header nei rispettivi file

    # ---- nodi
    variantWriter.writerow(variant_header)
    genotypeWriter.writerow(genotype_header)
    infoWriter.writerow(info_header)
    geneWriter.writerow(gene_header)
    chromosomeWriter.writerow(chromosome_header)

    # ---- relazioni
    supportedByWriter.writerow(supported_by_header)
    containsWriter.writerow(contains_header)
    ofSpeciesWriter.writerow(of_species_header)
    forVariantWriter.writerow(for_variant_header)
    inVariantWriter.writerow(in_variant_header)
    hasVariantWriter.writerow(has_variant_header)
    inChromosomeWriter.writerow(in_chromosome_header)

    print 'Starting parsing procedure for file ' + input_file

    # Connessione a Neo4j
    driver = GraphDatabase.driver("bolt://" + config["neo4j"]["address"],
                                  auth=basic_auth(config["neo4j"]["username"],
                                                  config["neo4j"]["password"]))

    session = driver.session()
    statements = [
        "CREATE INDEX ON :File(name);", "CREATE INDEX ON :Species(species);",
        "CREATE INDEX ON :Variant(variant_id);",
        "CREATE INDEX ON :Info(info_id);",
        "CREATE INDEX ON :Genotype(sample);",
        "CREATE INDEX ON :Gene(gene_id);",
        "CREATE INDEX ON :Chromosome(chromosome);"
    ]
    for statement in statements:
        session.run(statement)
    session.close()

    # Creo un nodo corrispondente al file
    properties = {
        "name": os.path.basename(file.name),
        "extension": os.path.splitext(input_file)[1]
    }

    statistics = {
        "total": 0,
        "hom": 0,
        "het": 0,
        "hom_alt": 0,
        "uncalled": 0,
        "snp": 0,
        "indels": 0,
        "unknown": 0,
        "in_dbSNP": 0,
        "not_in_dbSNP": 0,
        "in_1000g": 0,
        "not_in_1000g": 0
    }

    # ---- comincio creando i primi nodi di riferimento
    session = driver.session()

    prova = [
        "MERGE (u:User { username:{username} })",
        "MERGE (e:Experiment { name:{experiment} })",
        "MERGE (s:Species {species: {species} })",
        "CREATE (f:File { name:{properties}.name }) SET f.extension = {properties}.extension",
        "MERGE (u)-[:Created]->(e)", "MERGE (e)-[:For_Species]->(s)",
        "MERGE (e)-[:Composed_By]->(f)"
    ]

    # Associo il file all'utente
    session.run(
        " ".join(prova), {
            "username": username,
            "experiment": experiment,
            "species": species,
            "properties": properties
        })

    session.close()

    # inizializzo un contatore per fare un load parziale del file su database per file troppo grandi
    row_count = 0

    for record in reader:

        row_count += 1

        # Genero il nodo corrispondente alla variante
        variant = {
            "variant_id":
            record.CHROM + ':' + str(record.POS) + ':' + record.REF + ':' +
            ";".join(str(v)
                     for v in record.ALT) if isinstance(record.ALT, list) else
            record.ALT,  # id randomico utilizzato per indicizzare le varianti
            "CHROM":
            record.CHROM,
            "POS":
            record.POS,
            "REF":
            record.REF,
            "ALT":
            ";".join(str(v) for v in record.ALT) if isinstance(
                record.ALT, list) else record.ALT,
            "MUTATION":
            record.var_type,
        }

        # Aggiorno le statistiche sul file
        statistics["total"] += 1

        if variant["MUTATION"] == 'snp':
            statistics["snp"] += 1
        elif variant["MUTATION"] == 'indel':
            statistics["indels"] += 1
        else:
            statistics["unknown"] += 1

        # Costruisco la stringa della lista degli attributi delle annotazioni (sono costretto a farlo perchè non ho un modo univoco per sapere a priori i campi presenti)
        annotation = {
            "info_id": uuid.uuid4(),
            "END": record.end,
            "ID": record.ID or '.',
            "QUAL": record.QUAL,
            "FILTER": record.FILTER or 'PASS',
            "FORMAT": record.FORMAT or '.',
            "HETEROZIGOSITY": record.heterozygosity,
            "dbSNP": "",
            "DP": None,
            "Gene_refGene": None,
            "Func_refGene": None,
            "otg_all": None,
            "QD": None,
            "NM": None,
            "LM": None,
            "FS": None,
            "MQ0": None,
            "SIFT_score": None,
            "attributes": {}
        }

        for (key, value) in record.INFO.items():

            if re.match('(\w*)snp(\w*)', key):

                annotation["dbSNP"] = ";".join(
                    str(v)
                    for v in value) if isinstance(value, list) else value
                annotation["ID"] = ";".join(
                    str(v)
                    for v in value) if isinstance(value, list) else value

                if annotation["dbSNP"] == 'None':
                    annotation["dbSNP"] = None
                    annotation["ID"] = None
                    statistics['not_in_dbSNP'] += 1
                else:
                    statistics['in_dbSNP'] += 1

            if re.match('1000g(\w*)_all', key):

                if value:
                    annotation['otg_all'] = value
                    statistics['in_1000g'] += 1
                    continue
                else:
                    statistics['not_in_1000g'] += 1

            #annotation["attributes"] += key + "=" + ( ";".join(str(v) for v in value) if isinstance(value,list) else str(value) ) + ","
            #annotation["attributes"][key] = value[0] if isinstance(value,list) and len(value) == 1 else value
            if key.replace('.', '_') in info_header:

                if key == "LM":
                    annotation[key] = inferType(key, value[0].split('_'))
                    continue
                else:
                    annotation[key.replace('.', '_')] = inferType(key, value)
                    continue
            else:
                annotation["attributes"][key] = inferType(key, value)
            #if isinstance(value, list):
            #    for v in value:
            #        inferType(key, v)
            #else:
            #    inferType(key, value)

        # rimuovo la virgola in eccesso alla fine della stringa di attributi
        #annotation["attributes"] = annotation["attributes"].rstrip(',')

        # trasformo il dictionary ottenuto in formato json (serve per neomodel)
        annotation["attributes"] = json.dumps(annotation["attributes"])

        info_row = []

        for item in info_header:
            info_row.append(
                annotation[item] if annotation.has_key(item) else "")

        infoWriter.writerow(info_row)
        containsWriter.writerow([properties["name"], annotation["info_id"]])

        # Ricavo i nomi degli attributi dei sample
        format_vars = record.FORMAT.split(':')

        for sample in record.samples:

            genotypes.add(sample.sample)
            #attributes = '{ sample: "' + sample.sample + '", phased: ' + str(sample.phased) + ', state: ' + str(sample.gt_type) + ', '
            genotype = {
                "sample": sample.sample,
                "phased": sample.phased,
                #"state" : sample.gt_type or 'None',
                "attributes": {}
            }

            if not sample.gt_type:
                statistics["uncalled"] += 1
                genotype["state"] = "uncalled"
            elif sample.gt_type == 0:
                statistics["hom"] += 1
                genotype["state"] = "hom_ref"
            elif sample.gt_type == 1:
                statistics["hom_alt"] += 1
                genotype["state"] = "hom_alt"
            elif sample.gt_type == 2:
                statistics["het"] += 1
                genotype["state"] = "het"

            for i in range(len(format_vars)):
                #attributes = attributes + format_vars[i] + ': {' + format_vars[i] + '}, '
                #genotype["attributes"] += format_vars[i] + "=" + (";".join(str(v) for v in sample.data[i]) if isinstance(sample.data[i],list) else str(sample.data[i]) ) + ","
                #genotype["attributes"][format_vars[i]] = sample.data[i][0] if isinstance(sample.data[i],list) and len(sample.data[i]) == 1 else sample.data[i]
                genotype["attributes"][format_vars[i]] = inferType(
                    format_vars[i], sample.data[i])

            #genotype["attributes"] = genotype["attributes"].rstrip(',')

            genotype["attributes"] = json.dumps(genotype["attributes"])

            supported_by_row = []

            for item in supported_by_header:
                if item == "info_id":
                    supported_by_row.append(annotation["info_id"])
                else:
                    supported_by_row.append(
                        genotype[item] if genotype.has_key(item) else "")

            supportedByWriter.writerow(supported_by_row)

        variant_row = []

        for item in variant_header:
            variant_row.append(variant[item])

        variantWriter.writerow(variant_row)

        for_variant_row = [annotation["info_id"]]

        for item in for_variant_header:
            if variant.has_key(item):
                for_variant_row.append(variant[item])

        forVariantWriter.writerow(for_variant_row)

        # Aggiungo cromosomi e geni (e relative relazioni)
        chromosomes.add(record.CHROM)
        hasVariantWriter.writerow([record.CHROM, variant["variant_id"]])

        if record.INFO.has_key('Gene.refGene'):
            for g in record.INFO['Gene.refGene']:
                if not (g == 'NONE'):
                    genes.add(g)
                    inChromosomeWriter.writerow([g, record.CHROM])
                    inVariantWriter.writerow([g, variant["variant_id"]])

        sys.stdout.write("%d lines scanned %s" % (row_count, "\r"))
        sys.stdout.flush()

        if not (row_count % 15000):

            print ""

            for item in list(genes):
                geneWriter.writerow([item])

            for item in list(chromosomes):
                chromosomeWriter.writerow([item])

            for item in list(genotypes):
                genotypeWriter.writerow([item])
                ofSpeciesWriter.writerow([item, species])

            variant_csv.close()
            info_csv.close()
            genotype_csv.close()
            gene_csv.close()
            chromosome_csv.close()

            of_species_csv.close()
            contains_csv.close()
            supported_by_csv.close()
            for_variant_csv.close()
            in_variant_csv.close()
            has_variant_csv.close()
            in_chromosome_csv.close()

            populateDB(driver, temp_folder + temp_token)

            # Creo i csv per memorizzare le informazioni sui nodi
            variant_csv = open(temp_folder + temp_token + '_variant.csv', 'w')
            info_csv = open(temp_folder + temp_token + '_info.csv', 'w')
            genotype_csv = open(temp_folder + temp_token + '_genotype.csv',
                                'w')
            gene_csv = open(temp_folder + temp_token + '_gene.csv', 'w')
            chromosome_csv = open(temp_folder + temp_token + '_chromosome.csv',
                                  'w')

            # Creo i csv per memorizzare le informazioni sulle relazioni
            of_species_csv = open(temp_folder + temp_token + '_of_species.csv',
                                  'w')
            contains_csv = open(temp_folder + temp_token + '_contains.csv',
                                'w')
            supported_by_csv = open(
                temp_folder + temp_token + '_supported_by.csv', 'w')
            for_variant_csv = open(
                temp_folder + temp_token + '_for_variant.csv', 'w')
            in_variant_csv = open(temp_folder + temp_token + '_in_variant.csv',
                                  'w')
            has_variant_csv = open(
                temp_folder + temp_token + '_has_variant.csv', 'w')
            in_chromosome_csv = open(
                temp_folder + temp_token + '_in_chromosome.csv', 'w')

            # Inizializzo i writer per tutti i file

            # ---- nodi
            variantWriter = csv.writer(variant_csv, delimiter=',')
            infoWriter = csv.writer(info_csv, delimiter='\t')
            genotypeWriter = csv.writer(genotype_csv, delimiter=',')
            geneWriter = csv.writer(gene_csv, delimiter=',')
            chromosomeWriter = csv.writer(chromosome_csv, delimiter=',')

            # ---- relazioni
            ofSpeciesWriter = csv.writer(of_species_csv, delimiter=',')
            containsWriter = csv.writer(contains_csv, delimiter=',')
            supportedByWriter = csv.writer(supported_by_csv, delimiter='\t')
            forVariantWriter = csv.writer(for_variant_csv, delimiter=',')
            inVariantWriter = csv.writer(in_variant_csv, delimiter=',')
            hasVariantWriter = csv.writer(has_variant_csv, delimiter=',')
            inChromosomeWriter = csv.writer(in_chromosome_csv, delimiter=',')

            # Scrivo gli header nei rispettivi file

            # ---- nodi
            variantWriter.writerow(variant_header)
            genotypeWriter.writerow(genotype_header)
            infoWriter.writerow(info_header)
            geneWriter.writerow(gene_header)
            chromosomeWriter.writerow(chromosome_header)

            # ---- relazioni
            supportedByWriter.writerow(supported_by_header)
            containsWriter.writerow(contains_header)
            ofSpeciesWriter.writerow(of_species_header)
            forVariantWriter.writerow(for_variant_header)
            inVariantWriter.writerow(in_variant_header)
            hasVariantWriter.writerow(has_variant_header)
            inChromosomeWriter.writerow(in_chromosome_header)

            # session = driver.session()
            # session.run(" ".join([
            #         "USING PERIODIC COMMIT 15000",
            #         "LOAD CSV WITH HEADERS from 'File:///" + temp_folder +  temp_token + "_supported_by.csv' as line",
            #         "MERGE (i:Info {info_id: line.info_id}) WITH line, i",
            #         "MERGE (g:Genotype {sample: line.sample}) WITH line, i, g",
            #         "CREATE (i)-[s:Supported_By]->(g) SET s += line"
            #     ]))
            # session.close()

            # supported_by_csv = open(temp_folder + temp_token + '_supported_by.csv', 'w')
            # supportedByWriter = csv.writer(supported_by_csv, delimiter=',') #Riapro il writer
            # supportedByWriter.writerow(supported_by_header)

    print ""

    for item in list(genes):
        geneWriter.writerow([item])

    for item in list(chromosomes):
        chromosomeWriter.writerow([item])

    for item in list(genotypes):
        genotypeWriter.writerow([item])
        ofSpeciesWriter.writerow([item, species])

    file.close()

    # Termino la scrittura dei file (altrimenti non posso caricare i dati su database)
    variant_csv.close()
    info_csv.close()
    genotype_csv.close()
    gene_csv.close()
    chromosome_csv.close()

    of_species_csv.close()
    contains_csv.close()
    supported_by_csv.close()
    for_variant_csv.close()
    in_variant_csv.close()
    has_variant_csv.close()
    in_chromosome_csv.close()

    session = driver.session()

    prova = [
        "MATCH (u:User { username: {username} })-[:Created]->(e:Experiment { name:{experiment} })-[:Composed_By]->(f:File { name:{properties}.name })",
        "SET f.statistics =  {statistics}"
    ]

    # Associo il file all'utente
    session.run(
        " ".join(prova), {
            "username": username,
            "experiment": experiment,
            "species": species,
            "properties": properties,
            "statistics": json.dumps(statistics)
        })

    session.close()

    populateDB(driver, temp_folder + temp_token)

    # os.remove(input_file)
    os.remove(temp_folder + temp_token + '_variant.csv')
    os.remove(temp_folder + temp_token + '_info.csv')
    os.remove(temp_folder + temp_token + '_genotype.csv')
    os.remove(temp_folder + temp_token + '_gene.csv')
    os.remove(temp_folder + temp_token + '_chromosome.csv')
    os.remove(temp_folder + temp_token + '_of_species.csv')
    os.remove(temp_folder + temp_token + '_contains.csv')
    os.remove(temp_folder + temp_token + "_supported_by.csv")
    #for part in range(part_count + 1):
    #    os.remove(temp_folder +  temp_token + "_supported_by_" +  str(part) + ".csv")

    os.remove(temp_folder + temp_token + '_for_variant.csv')
    os.remove(temp_folder + temp_token + '_in_variant.csv')
    os.remove(temp_folder + temp_token + '_has_variant.csv')
    os.remove(temp_folder + temp_token + '_in_chromosome.csv')

    print 'Done.'
Exemple #56
0
from neo4j.v1 import GraphDatabase, basic_auth
import json
import sys
from time import gmtime, strftime
import logging
import ntpath

# Establish database session
driver = GraphDatabase.driver("bolt://localhost:7687",
                              auth=basic_auth("neo4j", "password"))
session = driver.session()

logger = logging.getLogger("sysmon2neo4j")
logger.setLevel(logging.INFO)
handler = logging.FileHandler('/var/log/logstash/sysmon2neo4j.log')
formatter = logging.Formatter(
    '[%(levelname)s] - %(name)s - %(asctime)s - %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)


def handle_event(data):
    event_data = data['event_data']
    if (data['event_id'] == 1):
        # Event ID 1 - Process created
        logger.debug("Processing event_id = 1")
        # Merge process details
        query = "MERGE (p:Process {{ProcessGuid: \"{}\"}})\n".format(
            event_data['ProcessGuid'])
        query += "ON CREATE SET "
        query += "p.UtcTime = \"{}\"".format(event_data['UtcTime'])
 def __init__(self, uri, ticket):
     self._driver = GraphDatabase.driver(uri, auth=kerberos_auth(ticket))
Exemple #58
0
 def __init__(self, uri, user, password):
     self._driver = GraphDatabase.driver(uri, auth=(user, password))
Exemple #59
0
def get_cop(session, drug, disease):
    result = session.run(
        """MATCH path = (dr:Drug {name: '%s'})-[*..6]-(di:Disease {name:'%s'})
                         UNWIND nodes(path) as n
                         UNWIND relationships(path) as r
                         RETURN collect(distinct n) as nodes, collect(distinct r) as edges"""
        % (drug, disease.replace("'", "")))

    return (result)


# Open database connection
config = Config().config
driver = GraphDatabase.driver(config['neo4j']['host'],
                              auth=(config['neo4j']['user'],
                                    config['neo4j']['password']))

cop_file = './data/cop_benchmark.csv'
cop = pd.read_csv(cop_file)

with driver.session() as session:
    for index, row in cop.iterrows():
        result = get_cop(session, row['Drug'].lower().capitalize(),
                         row['ConditionName'].lower().capitalize())
        for record in result:
            print(row['Drug'], row['ConditionName'], record)

    # result = get_cop(session,
    #                  'Naproxen',
    #                  'Osteoarthritis')
Exemple #60
0
#    not use this file except in compliance with the License. You may obtain
#    a copy of the License at
#
#         http://www.apache.org/licenses/LICENSE-2.0
#
#    Unless required by applicable law or agreed to in writing, software
#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
#    License for the specific language governing permissions and limitations
#    under the License.

import json

from neo4j.v1 import GraphDatabase

driver = GraphDatabase.driver("bolt://10.75.44.192:7687",
                              auth=("neo4j", "cisco123"))
session = driver.session()
f = open('1517386971.52.msg')


def get_path(originator_id, cluster_list):
    """link list
    """
    path_list = []
    cluster_list.reverse()
    path_list.append((originator_id, cluster_list[0]))
    i = 0
    while i < len(cluster_list) - 1:
        path_list.append((cluster_list[i], cluster_list[i + 1]))
        i += 1
    return path_list