Example #1
0
def output_kafka(graph_db, registry,
                 kafka_url=None):
    ldict = {"step": MODULEFILE + "/" + inspect.stack()[0][3],
             "hostname": platform.node().split(".")[0]}
    l = logging.LoggerAdapter(common.fetch_lg(), ldict)
    kafka_topic = "cs"
    if kafka_url is None:
        kafka_url = registry.get_config("kafka_url",
                                        "localhost:9092")
    else:
        l.info("Updating registry with kafka_url: {}".format(kafka_url))
        registry.put_config("kafka_url",
                            kafka_url)
    (nodes, rels) = out.output_json(graph_db, None, None, as_list=True)
    l.info("Connecting to kafka_url {}".format(kafka_url))
    kafka = KafkaClient(kafka_url)
    # To send messages asynchronously
    producer = SimpleProducer(kafka)
    l.info("Sending nodes to kafka {}/{}".format(kafka_url, kafka_topic))
    for n in nodes:
        producer.send_messages(kafka_topic, n)
    l.info("Sending rels to kafka {}/{}".format(kafka_url, kafka_topic))
    for n in rels:
        producer.send_messages(kafka_topic, n)
    kafka.close()
Example #2
0
def output_json(graph_db,
                out_file_nodes,
                out_file_rels,
                as_list=False):
    ldict = {"step": MODULEFILE + "/" + inspect.stack()[0][3],
             "hostname": platform.node().split(".")[0]}
    l = logging.LoggerAdapter(common.fetch_lg(), ldict)
    if out_file_nodes is None:
        out_file_nodes = "/dev/stdout"
    if out_file_rels is None:
        out_file_rels = "/dev/stdout"
    nod_obj = {}
    rel_obj = {}
    l.info("Iterating all nodes and rels")
    for r in constants.CONST_REL_TYPE:
        for k, v in graph_db(r).iteritems():
            rel_obj[v.id()] = v
            nod_obj[v.srcId()] = \
                graph_db(constants.getNodeType(v.srcType()))[v.srcHash()]
            nod_obj[v.dstId()] = \
                graph_db(constants.getNodeType(v.dstType()))[v.dstHash()]

    if as_list:
        return (_convert_node_json(nod_obj, l),
                _convert_rel_json(rel_obj, nod_obj, l))

    if out_file_nodes:
        nodes_list = _convert_node_json(nod_obj, l)
        with open(out_file_nodes, 'w') as w:
            for n in nodes_list:
                w.write(n + "\n")
            l.info("wrote node list as json to {}".format(out_file_nodes))

    if out_file_rels:
        rels_list = _convert_rel_json(rel_obj, nod_obj, l)
        with open(out_file_rels, 'w') as w:
            for r in rels_list:
                w.write(r + "\n")
            l.info("wrote rel list as json to {}".format(out_file_rels))
Example #3
0
                       type=str, default=None,
                       help='store results in kafka, specify kafka router url')
group_out.add_argument('--outputjson', metavar="FILE.json",
                       type=str, default=None, nargs=2,
                       help='store results in json files, specify two files.' +
                       'files. First file for nodes, second for rels')


args = parser.parse_args()

lg = common.setup_logger(level=args.verbose,
                         logfile=None)
hs = platform.node().split(".")[0]
ldict = {"step": "main",
         "hostname": hs}
l = logging.LoggerAdapter(common.fetch_lg(), ldict)


if args.inputlog:
    if not os.path.exists(args.inputlog):
        l.error("input log file does not exist")
        sys.exit(1)
    g = Node.myGraph(cache=None,
                     inputfile=args.inputlog,
                     etcdhost=args.etcdhost,
                     etcdport=args.etcdport)
elif args.inputlocal:
    g = Node.myGraph(cache=None,
                     etcdhost=args.etcdhost,
                     etcdport=args.etcdport)
else:
Example #4
0
def output_neo4j(graph_db, registry, neo4j_url=None, debug=False):
    ldict = {"step": MODULEFILE + "/" + inspect.stack()[0][3],
             "hostname": platform.node().split(".")[0]}
    l = logging.LoggerAdapter(common.fetch_lg(), ldict)
    nod_obj = {}
    rel_obj = {}
    nod_neo = {}
    #rel_neo = {}
    for r in constants.CONST_REL_TYPE:
        for k, v in graph_db(r).iteritems():
            rel_obj[v.id()] = v
            nod_obj[v.srcId()] = \
                graph_db(constants.getNodeType(v.srcType()))[v.srcHash()]
            nod_obj[v.dstId()] = \
                graph_db(constants.getNodeType(v.dstType()))[v.dstHash()]
    if neo4j_url is None:
        neo4j_url = registry.get_config("neo4j_url",
                                        'http://localhost:7474/db/data')
    else:
        l.info("Updating registry with neo4j_url: {}".format(neo4j_url))
        registry.put_config("neo4j_url",
                            neo4j_url)
    l.info("Connecting to neo4j url {}".format(neo4j_url))
    neo4j_db = neo4j.GraphDatabaseService(neo4j_url)
    batch = neo4j.WriteBatch(neo4j_db)
    # write all nodes
    for k, v in nod_obj.iteritems():
        batch.\
            get_or_create_indexed_node("vid",
                                       "vid",
                                       k,
                                       v.property())
    l.info("Submitting node data to neo4j")
    result = batch.submit()
    if debug:
        results = "\t\t"
        for i in result:
            results += "\n\t\t{}".format(i)
        l.info("Got all node: {}".format(results))

    for k, v in zip(nod_obj.keys(), result):
        nod_neo[k] = v

    # add node labels and index it
    batch = neo4j.WriteBatch(neo4j_db)
    for k, v in nod_obj.iteritems():
        batch.set_labels(nod_neo[k],
                         constants.getNodeType(v.type()))
        batch.add_to_index(neo4j.Node,
                           "Name",
                           "name",
                           v.property()["name"],
                           nod_neo[k])
    l.info("Submitting node index/label data to neo4j")
    result = batch.submit()

    # add rel
    batch = neo4j.WriteBatch(neo4j_db)
    for k, v in rel_obj.iteritems():
        batch.get_or_create_path(nod_neo[v.srcId()],
                                 constants.getRelType(v.type()),
                                 nod_neo[v.dstId()])
    l.info("Submitting rel data to neo4j")
    result = batch.submit()

    if debug:
        results = "\t\t"
        for i in result:
            results += "\n\t\t{}".format(i)
        l.info("Got all node: {}".format(results))