Example #1
0
def create_neo4j_container(container_name, volume_home=VOLUME_HOME):

    if not os.path.exists(volume_home):
        os.makedirs(volume_home)

    # Should we also add the  -v {1}/neo4j/conf:/conf \ option to the below command for the neo4j conf files?

    # See: https://neo4j.com/labs/apoc/4.2/installation/#restricted
    # and https://github.com/neo4j-contrib/neo4j-apoc-procedures/issues/451
    # Should I add the option below too?
    # 	-e NEO4J_dbms_security_procedures_whitelist=apoc.coll.\\\*,apoc.load.\\\* \
    command = """docker run \
    --name {0} \
    -p7474:7474 -p7687:7687 \
    -d \
    -v {1}/neo4j/data:/data \
    -v {1}/neo4j/logs:/logs \
    -v {4}:/var/lib/neo4j/import \
    -v {1}/neo4j/plugins:/plugins \
    -e NEO4J_apoc_export_file_enabled=true \
    -e NEO4J_apoc_import_file_enabled=true \
    -e NEO4J_apoc_import_file_use__neo4j__config=true \
    -e NEO4JLABS_PLUGINS='["apoc"]' \
    -e NEO4J_dbms_security_procedures_unrestricted=apoc.\\\* \
    -e PYTHONUNBUFFERED=1 \
    --env NEO4J_AUTH={2}/{3} \
    --user="******" \
    neo4j:4.2.3
	""".format(container_name, volume_home, constants.NEO4J_USER,
            constants.NEO4J_PASS, constants.OUTPUT_NODES_RELS_PATH)
    # Note: pass the analyzer outputs folder as the import directory of neo4j

    utilityModule.run_os_command(command, print_stdout=False)
    logger.info('Docker container %s is starting.' % str(container_name))
Example #2
0
def main():
	parser = argparse.ArgumentParser(description='This script builds a property graph from a given JavaScript program.')
	parser.add_argument('path', metavar='P', help='base path to the folder containing the program files for analysis (must be under the outputs folder).')
	parser.add_argument('--js', help='name of the JavaScript program for analysis (default: js_program.js)', default='js_program.js')
	parser.add_argument('--import', help='whether the constructed property graph should be imported to an active neo4j database (default: true)', default='true')
	parser.add_argument('--hybrid', help='whether the hybrid mode is enabled (default: false)', default='false')
	parser.add_argument('--reqs', help='for hybrid mode only, name of the file containing the sequence of obsevered network requests, pass the string false to exclude (default: request_logs_short.out)', default='request_logs_short.out')
	parser.add_argument('--evts', help='for hybrid mode only, name of the file containing the sequence of fired events, pass the string false to exclude (default: events.out)', default='events.out')
	parser.add_argument('--cookies', help='for hybrid mode only, name of the file containing the cookies, pass the string false to exclude (default: cookies.pkl)', default='cookies.pkl')
	parser.add_argument('--html', help='for hybrid mode only, name of the file containing the DOM tree snapshot, pass the string false to exclude (default: html_rendered.html)', default='html_rendered.html')


	# dictionary of the provided arguments
	args = vars(parser.parse_args())

	base_path = args['path']
	if constantsModule.OUTPUT_NODES_RELS_PATH not in base_path:
		logger.error('Path of the program under analysis must be within the outputs folder.')
		sys.exit(1)

	js_program = os.path.join(args['path'], args['js'])

	# find the folder name of the program under analysis within the /outputs folder
	relative_output_path = args['path']
	i = relative_output_path.index('/outputs/')+len('/outputs/')
	relative_output_path = relative_output_path[i:] 


	# build the property graph for the js program
	command = "node --max-old-space-size=32000 %s -js %s -o %s"%(constantsModule.ANALYZER_DRIVER_PATH, js_program, relative_output_path)
	utilityModule.run_os_command(command, timeout=15*60)


	# store also the dynamic info inside the csv
	if args['hybrid'] == 'true':
		
		if args['reqs'] != 'false':
			StateValues.add_requests_to_graph(base_path, args['reqs'])

		if args['evts'] != 'false':
			StateValues.add_events_to_graph(base_path, args['evts'])

		if args['cookies'] != 'false':
			StateValues.add_cookies_to_graph(base_path, args['cookies'])

		if args['html'] != 'false':
			StateValues.add_dom_tree_snapshot_to_graph(base_path, args['html'])


	# import the constructed csv into an active neo4j database
	if args['import'] == 'true':
		API_neo4j_prepare(base_path)
Example #3
0
def API_build_property_graph_for_file(file_absolute_path,
                                      file_name,
                                      timeout=30 * 60):
    """
	builds a property graph database for a given file
	"""

    file_absolute_path_name = os.path.join(file_absolute_path, file_name)

    if 'hpg_construction/outputs' in file_absolute_path:
        e_index = file_absolute_path.index('hpg_construction/outputs') + len(
            'hpg_construction/outputs') + 1
        output_folder = file_absolute_path[e_index:]
        output_folder = os.path.join(output_folder, file_name.rstrip('.js'))
    elif 'hpg_construction/unit_tests' in file_absolute_path:
        e_index = file_absolute_path.index(
            'hpg_construction/unit_tests') + len(
                'hpg_construction/unit_tests') + 1
        output_folder = os.path.join("unit_tests",
                                     file_absolute_path[e_index:])
        output_folder = os.path.join(output_folder, file_name.rstrip('.js'))
    else:
        logger.error(
            'input file for graph construction must be under the hpg_construction/outputs/ folder!'
        )
        sys.exit(1)

    command = "node --max-old-space-size=32000 %s -js %s -o %s" % (
        constantsModule.ANALYZER_DRIVER_PATH, file_absolute_path_name,
        output_folder)
    output = run_os_command(command, timeout=timeout)
    return output
Example #4
0
    def _analyze():
        """
		builds the web property graph
		"""

        library_name_with_extension = utilityModule.get_directory_last_part(
            file_path)
        remove_str = constantsModule.OUTPUT_NODES_RELS_PATH + '/'
        relative_output_directory = utilityModule.remove_part_from_str(
            file_path, remove_str)
        relative_output_directory = utilityModule.get_directory_without_last_part(
            relative_output_directory)

        command = "node %s -js %s -o %s" % (
            constantsModule.ANALYZER_DRIVER_PATH, file_path,
            relative_output_directory)
        utilityModule.run_os_command(command)
Example #5
0
    def instrument_for_dynamic_js_constructs(
            file_path_name,
            create_new_file=False,
            new_file_name=constantsModule.NAME_JS_PROGRAM_INSTRUMENTED):
        """
		@param {string} file_path_name: input system path to a JS code
		@param {bool} create_new_file: if set to False, overwrites the input JS file with the normalized version, 
				otherwise, it creates a new file with the name set in the new_file_name field
		@param {string} new_file_name
		@description: beautifies a given JS program
		@return None
		"""

        if create_new_file:
            output = utilityModule.get_directory_without_last_part(
                file_path_name) + new_file_name
        else:
            output = file_path_name

        driver_path = os.path.join(
            constantsModule.BASE_DIR,
            'hpg_construction/lib/jaw/normalization/dynamic.js')
        cmd = 'node %s %s %s' % (driver_path, file_path_name, output)
        utilityModule.run_os_command(cmd)
Example #6
0
def import_data_inside_container(container_name,
                                 database_name,
                                 relative_import_path,
                                 mode='graphML'):
    """
	@param {string} container_name
	@param {string} database_name
	@param {string} relative_import_path: path relative to ./hpg_construction/outputs/
		in case of CSV: path of the folder containing nodes.csv, rels.csv 
		in case of graphML: path of the graphML file
	@param {string} mode: type of input (options are 'CSV' or 'graphML')
	"""

    if mode == 'CSV':

        csv_path = os.path.join('/var/lib/neo4j/import', relative_import_path)
        nodes_path = os.path.join(csv_path, constants.NODE_INPUT_FILE_NAME)
        rels_path = os.path.join(csv_path, constants.RELS_INPUT_FILE_NAME)

        # see: https://neo4j.com/docs/operations-manual/current/tools/neo4j-admin-import/#import-tool-option-skip-duplicate-nodes
        if constants.NEO4J_VERSION.startswith(constants.NEOJ_VERSION_4X):
            neo4j_import_cmd = "neo4j-admin import --database=%s --nodes=%s --relationships=%s --delimiter='¿' --skip-bad-relationships=true --skip-duplicate-nodes=true" % (
                database_name, nodes_path, rels_path)
        else:
            neo4j_import_cmd = "neo4j-admin import --mode=csv --database=%s --nodes=%s --relationships=%s --delimiter='¿' --skip-bad-relationships=true --skip-duplicate-nodes=true" % (
                database_name, nodes_path, rels_path)

        # directly run the command inside the neo4j container with docker exec
        cmd = "docker exec -it %s %s" % (container_name, neo4j_import_cmd)
        utilityModule.run_os_command(cmd, print_stdout=True, prettify=True)
        return 1

    elif mode == 'graphML':
        return DU.exec_fn_within_transaction(
            import_data_inside_container_with_cypher, database_name,
            relative_import_path)
Example #7
0
def activate_existing_neo4j_db(database_name):
    """
	activates a neo4j database that has been previously imported
	@param {string} database_name which has the .db extension included
	@return {bool} whether or not the given database name is activated 
	"""

    # stop neo4j
    STOP_NEO4J_COMMAND = "neo4j stop"
    run_os_command(STOP_NEO4J_COMMAND)

    db_absolute_path = os.path.join(constantsModule.NEO4J_DB_PATH,
                                    database_name)
    if not os.path.exists(db_absolute_path):

        if constantsModule.DEBUG_PRINTS:
            logger.warning(
                "No neo4j database with name \'%s\' exists for activation!" %
                database_name)
        return False

    else:
        # change the active db
        new_config_line = "dbms.active_database=%s" % database_name
        if constantsModule.CURRENT_PLATFORM == constantsModule.PLATFORMS[
                'MAC_OS_X']:
            CHANGE_ACTIVE_GRAPH_COMMAND = """sed -i '' 's/dbms.active_database=.*db/%s/1' %s""" % (
                new_config_line, constantsModule.NEO4J_CONF)
        elif constantsModule.CURRENT_PLATFORM == constantsModule.PLATFORMS[
                'Linux']:
            CHANGE_ACTIVE_GRAPH_COMMAND = """sed -i 's/dbms.active_database=.*db/%s/1' %s""" % (
                new_config_line, constantsModule.NEO4J_CONF)
        else:
            logger.error(
                'Detected unsupported platform. Check your enviornment variables (.env) file if your platform is supported but it is set wrong.'
            )
            return False
        run_os_command(CHANGE_ACTIVE_GRAPH_COMMAND, print_stdout=False)

        # start neo4j
        START_NEO4J_COMMAND = "neo4j start"
        run_os_command(START_NEO4J_COMMAND)

        if constantsModule.DEBUG_PRINTS:
            logger.info("Neo4J DB setup successful.")

        time.sleep(3)

        return True
Example #8
0
def API_neo4j_prepare(csv_absolute_path,
                      nodes_name=constantsModule.NODE_INPUT_FILE_NAME,
                      relationships_name=constantsModule.RELS_INPUT_FILE_NAME,
                      load_dom_tree_if_exists=True):
    """
	@param {string} csv_absolute_path: absolute path to the graph node and relation csv files
	@param {string} nodes_name: the name of the CSV file for nodes
	@param {string} relationships_name: the name of the CSV file for edges or relationships
	@param {boolean} load_dom_tree_if_exists: if set, the function loads the HTML code of the dom snapshot to the property graph in addition to the html path
	@description imports graph csv files, and prepares the db
	@return {void} None
	"""

    folder_name_as_db_name = _get_last_subpath(csv_absolute_path)
    nodes_path = os.path.join(csv_absolute_path, nodes_name)
    rels_path = os.path.join(csv_absolute_path, relationships_name)

    if (not os.path.isfile(nodes_path)) or (not os.path.isfile(rels_path)):
        logger.error(
            'CSV nodes or relationships file does not exist for importing in the given directory: %s'
            % csv_absolute_path)
        sys.exit(1)

    # stop neo4j
    STOP_NEO4J_COMMAND = "neo4j stop"
    run_os_command(STOP_NEO4J_COMMAND)

    # handle the case where the db for the url already exists: skip creating a new one by storing a map or delete the old one!
    DELETE_OLD_DB_IF_EXISTS_COMMAND = "rm -rf %s.db" % (os.path.join(
        constantsModule.NEO4J_DB_PATH, folder_name_as_db_name))
    run_os_command(DELETE_OLD_DB_IF_EXISTS_COMMAND)

    # import the data
    NEO4J_IMPORT_COMMAND = """neo4j-admin import --mode=csv --database=%s.db --nodes=%s --relationships=%s --delimiter='¿'""" % (
        folder_name_as_db_name, nodes_path, rels_path)
    run_os_command(NEO4J_IMPORT_COMMAND)

    # change the active db
    new_config_line = "dbms.active_database=%s.db" % folder_name_as_db_name
    if constantsModule.CURRENT_PLATFORM == constantsModule.PLATFORMS[
            'MAC_OS_X']:
        CHANGE_ACTIVE_GRAPH_COMMAND = """sed -i '' 's/dbms.active_database=.*db/%s/1' %s""" % (
            new_config_line, constantsModule.NEO4J_CONF)
    elif constantsModule.CURRENT_PLATFORM == constantsModule.PLATFORMS[
            'Linux']:
        CHANGE_ACTIVE_GRAPH_COMMAND = """sed -i 's/dbms.active_database=.*db/%s/1' %s""" % (
            new_config_line, constantsModule.NEO4J_CONF)
    else:
        logger.error(
            'Detected unsupported platform. Check your enviornment variables (.env) file if your platform is supported but it is set wrong.'
        )
        sys.exit(1)
    run_os_command(CHANGE_ACTIVE_GRAPH_COMMAND, print_stdout=False)

    # start neo4j
    START_NEO4J_COMMAND = "neo4j start"
    run_os_command(START_NEO4J_COMMAND)

    if constantsModule.DEBUG_PRINTS:
        logger.info("Neo4J DB setup in progress. Waiting for 10 seconds.")

    time.sleep(10)

    if load_dom_tree_if_exists:
        dom_tree_nodes = ORMModule.DOMSnapshot.nodes.all()
        for node in dom_tree_nodes:
            fd = open(node.Location, 'r')
            html = fd.read()
            fd.close()
            node.Code = html
            node.save()
Example #9
0
def stop_neo4j_container(container_name):

    command = "docker stop %s" % str(container_name)
    utilityModule.run_os_command(command, print_stdout=False)
    logger.warning('Docker container %s is being stopped.' %
                   str(container_name))
Example #10
0
def start_neo4j_container(container_name):

    command = "docker start %s" % str(container_name)
    utilityModule.run_os_command(command, print_stdout=False)
    logger.info('Docker container %s is starting.' % str(container_name))