def populateMetadata(): import pysharkbite conf = pysharkbite.Configuration() conf.set("FILE_SYSTEM_ROOT", "/accumulo") model = apps.get_model(app_label='query', model_name='AccumuloCluster') accumulo_cluster = model.objects.first() if accumulo_cluster is None: return zk = pysharkbite.ZookeeperInstance(accumulo_cluster.instance, accumulo_cluster.zookeeper, 1000, conf) user = pysharkbite.AuthInfo("root", "secret", zk.getInstanceId()) connector = pysharkbite.AccumuloConnector(user, zk) indexTableOps = connector.tableOps("DatawaveMetadata") auths = pysharkbite.Authorizations() indexScanner = indexTableOps.createScanner(auths, 100) indexrange = pysharkbite.Range() indexScanner.addRange(indexrange) indexScanner.fetchColumn("f", "") combinertxt = "" ## load the combiner from the file system and send it to accumulo with open('countgatherer.py', 'r') as file: combinertxt = file.read() combiner = pysharkbite.PythonIterator("MetadataCounter", combinertxt, 200) indexScanner.addIterator(combiner) indexSet = indexScanner.getResultSet() import json counts = 0 mapping = {} for indexKeyValue in indexSet: value = indexKeyValue.getValue() key = indexKeyValue.getKey() if key.getColumnFamily() == "f": day = key.getColumnQualifier().split("\u0000")[1] dt = key.getColumnQualifier().split("\u0000")[0] if day in mapping: if key.getRow() in mapping[day]: try: mapping[day][key.getRow()] += int(value.get()) except: pass else: try: mapping[day][key.getRow()] = int(value.get()) except: pass else: mapping[day] = {} try: mapping[day][key.getRow()] = int(value.get()) except: pass caches['metadata'].set("field", json.dumps(mapping), 3600 * 48) return json.dumps(mapping)
def inity(self, replace=False): import pysharkbite self._zk = pysharkbite.ZookeeperInstance(self._instance, self._zks, 1000, self._conf) self._user = pysharkbite.AuthInfo(self._username, self._password, self._zk.getInstanceId()) if replace == True: self._connector = pysharkbite.AccumuloConnector( self._user, self._zk) self._tableOperations = self._connector.tableOps(self._table) self._securityOperations = self._connector.securityOps()
def get_uploads(): model = apps.get_model(app_label='query', model_name='FileUpload') objs = model.objects.filter(status="NEW") haveNew = False for obj in objs: if obj.status == "NEW": caches['eventcount'].set("ingestcomplete", 95) haveNew = True if not haveNew: caches['eventcount'].set("ingestcomplete", 100) return import pysharkbite conf = pysharkbite.Configuration() conf.set("FILE_SYSTEM_ROOT", "/accumulo") model = apps.get_model(app_label='query', model_name='AccumuloCluster') accumulo_cluster = model.objects.first() if accumulo_cluster is None: return zk = pysharkbite.ZookeeperInstance(accumulo_cluster.instance, accumulo_cluster.zookeeper, 1000, conf) user = pysharkbite.AuthInfo("root", "secret", zk.getInstanceId()) connector = pysharkbite.AccumuloConnector(user, zk) auths = pysharkbite.Authorizations() auths.addAuthorization("PROV") indexTableOps = connector.tableOps("provenanceIndex") indexScanner = indexTableOps.createScanner(auths, 10) indexrange = pysharkbite.Range() indexScanner.addRange(indexrange) indexScanner.fetchColumn("CONTENTURI", "") indexScanner.fetchColumn("TRANSITURI", "") indexSet = indexScanner.getResultSet() count = 0 usercount = 0 try: for indexKeyValue in indexSet: if indexKeyValue.getKey().getColumnFamily() == "CONTENTURI": count = count + 1 else: usercount = usercount + 1 if count > 0: caches['eventcount'].set("ingestcount", count, 3600 * 48) if usercount > 0: caches['eventcount'].set("useruploads", usercount, 3600 * 48) indexScanner.close() except: pass ## user does not have PROV
table = args.table if not password: print("Please enter your password") password = input() if not table: table = "blahblahd" import pysharkbite conf = pysharkbite.Configuration() conf.set ("FILE_SYSTEM_ROOT", "/accumulo"); zk = pysharkbite.ZookeeperInstance(args.instance, args.zookeepers, 1000, conf) user = pysharkbite.AuthInfo(args.username, password, zk.getInstanceId()) try: connector = pysharkbite.AccumuloConnector(user, zk) tableOperations = connector.tableOps(table) if not tableOperations.exists(False): print ("Creating table " + table) tableOperations.create(False) else: print (table + " already exists, so not creating it")
def mthd(self): import pysharkbite tableOperations = super().getTableOperations() ## validate that table is removed try: if not tableOperations.exists(False): print("Creating table") if not tableOperations.create(False): print("Could not create table") else: print("Table already exists, so not creating it") auths = pysharkbite.Authorizations() """ Add authorizations """ """ mutation.put("cf","cq","cv",1569786960) """ tableOperations.remove() time.sleep(1) writer = tableOperations.createWriter(auths, 10) mutation = pysharkbite.Mutation("sow2") mutation.put("cf", "cq", "", 1569786960, "value") mutation.put("cf2", "cq2", "", 1569786960, "value2") """ no value """ mutation.put("cf3", "cq3", "", 1569786960, "") writer.addMutation(mutation) writer.close() writer = tableOperations.createWriter(auths, 10) rng = range(0, 1000) for i in rng: row = ("row%i" % (i + 5)) mutation = pysharkbite.Mutation(row) mutation.put("cf", "cq", "", 1569786960, "value") writer.addMutation(mutation) writer.close() print("Table not removed") sys.exit(1) except (RuntimeError, TypeError, NameError): print("Table successfully removed") # this is okay try: testzk = pysharkbite.ZookeeperInstance(None, None, 1000, None) print("Table not removed") sys.exit(1) except (RuntimeError, TypeError, NameError): print("Caught expected error") # this is okay try: writer = tableOperations.createWriter(None, 10) print("Expected error passing None") sys.exit(1) except (RuntimeError, TypeError, NameError): print("Caught expected error") # this is okay tableOperations.remove()
def __init__(self): self.zk = pysharkbite.ZookeeperInstance( AccumuloCluster.objects.first().instance, AccumuloCluster.objects.first().zookeeper, 1000, conf)
def __init__(self): parser = ArgumentParser( description="This is an Apache Accummulo Python connector") parser.add_argument("-i", "--instance", dest="instance", help="Apache Accumulo Instance Name", required=True) parser.add_argument("-z", "--zookeepers", dest="zookeepers", help="Comma Separated Zookeeper List", required=True) parser.add_argument("-u", "--username", dest="username", help="User to access Apache Accumulo", required=True) parser.add_argument( "-p", "--password", dest="password", help= "Password to access Apache Accumulo. May also be supplied at the command line" ) parser.add_argument("-t", "--table", dest="table", help="Table to create/update") parser.add_argument("-s", "--solocation", dest="shardobject", help="Shared object Location") args = parser.parse_args() instance = args.instance zookeepers = args.zookeepers password = args.password table = args.table dll = args.shardobject print("Opening ", dll) py = cdll.LoadLibrary(dll) import pysharkbite self._conf = pysharkbite.Configuration() self._conf.set("FILE_SYSTEM_ROOT", "/accumulo") self._zk = pysharkbite.ZookeeperInstance(args.instance, args.zookeepers, 1000, self._conf) self._user = pysharkbite.AuthInfo(args.username, password, self._zk.getInstanceId()) self._connector = pysharkbite.AccumuloConnector(self._user, self._zk) self._tableOperations = self._connector.tableOps(table) print("created connector")
def run(*args): admin_user = "******" admin_password = "******" auth = Auth() auth.auth = "TST" auth.save() democonfig = IngestConfiguration() democonfig.name = "democonfig" democonfig.use_provenance = True democonfig.post_location = "http://nifi:8181/contentListener" democonfig.save() instance = "uno" zookeepers = "192.168.1.88:2181" acc_user = "******" acc_pass = "******" for arg in args: if arg.startswith("password"): split = arg.split("=") if len(split) == 2: admin_password = split[1] if arg.startswith("username"): split = arg.split("=") if len(split) == 2: admin_user = split[1] if arg.startswith("instance"): split = arg.split("=") if len(split) == 2: admin_user = split[1] if arg.startswith("zookeepers"): split = arg.split("=") if len(split) == 2: zookeepers = split[1] if arg.startswith("accuser"): split = arg.split("=") if len(split) == 2: acc_user = split[1] if arg.startswith("accpass"): split = arg.split("=") if len(split) == 2: acc_pass = split[1] acc = AccumuloCluster() acc.instance = instance acc.zookeeper = zookeepers acc.user = acc_user acc.password = acc_pass acc.save() import pysharkbite while True: try: conf = pysharkbite.Configuration() conf.set("FILE_SYSTEM_ROOT", "/accumulo") zoo_keeper = pysharkbite.ZookeeperInstance(instance, zookeepers, 1000, conf) user = pysharkbite.AuthInfo(acc_user, acc_pass, zoo_keeper.getInstanceId()) connector = pysharkbite.AccumuloConnector(user, zoo_keeper) security_ops = connector.securityOps() auths = pysharkbite.Authorizations() auths.addAuthorization("MTRCS") auths.addAuthorization("PROV") security_ops.grantAuthorizations(auths, acc_user) table_ops = connector.tableOps("shard") if not table_ops.exists(False): table_ops.create(False) table_ops = connector.tableOps("shardIndex") if not table_ops.exists(False): table_ops.create(False) table_ops = connector.tableOps("shardReverse") if not table_ops.exists(False): table_ops.create(False) table_ops = connector.tableOps("graph") if not table_ops.exists(False): table_ops.create(False) table_ops = connector.tableOps("provenance") if not table_ops.exists(False): table_ops.create(False) table_ops = connector.tableOps("provenanceIndex") if not table_ops.exists(False): table_ops.create(False) table_ops = connector.tableOps("provenanceReverseIndex") if not table_ops.exists(False): table_ops.create(False) break except RuntimeError as e: exc = str(e) if exc.find("Instance Id does not exist") > 0: pass else: break
def pouplateEventCountMetadata(): import pysharkbite import time conf = pysharkbite.Configuration() conf.set("FILE_SYSTEM_ROOT", "/accumulo") model = apps.get_model(app_label='query', model_name='AccumuloCluster') accumulo_cluster = model.objects.first() if accumulo_cluster is None: return zoo_keeper = pysharkbite.ZookeeperInstance(accumulo_cluster.instance, accumulo_cluster.zookeeper, 1000, conf) user = pysharkbite.AuthInfo("root", "secret", zoo_keeper.getInstanceId()) connector = pysharkbite.AccumuloConnector(user, zoo_keeper) queryRanges = list() #last 15 days for dateinrange in getDateRange(-15): shardbegin = dateinrange.strftime("%Y%m%d") if caches['eventcount'].get(shardbegin) is None or caches[ 'eventcount'].get(shardbegin) == 0: queryRanges.append(shardbegin) else: pass # don't add to range if len(queryRanges) > 0: ## all is cached user = pysharkbite.AuthInfo("root", "secret", zoo_keeper.getInstanceId()) connector = pysharkbite.AccumuloConnector(user, zoo_keeper) indexTableOps = connector.tableOps("DatawaveMetrics") auths = pysharkbite.Authorizations() auths.addAuthorization("MTRCS") indexScanner = indexTableOps.createScanner(auths, 100) start = time.time() for dt in queryRanges: indexrange = pysharkbite.Range(dt, True, dt + "\uffff", False) indexScanner.addRange(indexrange) indexScanner.fetchColumn("EVENT_COUNT", "") combinertxt = "" ## load the combiner from the file system and send it to accumulo with open('metricscombiner.py', 'r') as file: combinertxt = file.read() combiner = pysharkbite.PythonIterator("MetadataCounter", combinertxt, 200) indexScanner.addIterator(combiner) indexSet = indexScanner.getResultSet() counts = 0 mapping = {} try: for indexKeyValue in indexSet: value = indexKeyValue.getValue() key = indexKeyValue.getKey() if key.getColumnFamily() == "EVENT_COUNT": dt = key.getRow().split("_")[0] if dt in mapping: mapping[dt] += int(value.get()) else: mapping[dt] = int(value.get()) arr = [None] * len(mapping.keys()) for field in mapping: caches['eventcount'].set(field, str(mapping[field]), 3600 * 48) except: pass
def __init__(self): import pysharkbite model = apps.get_model(app_label='query', model_name='AccumuloCluster') AccumuloCluster = model.objects.first() self.zoo_keeper = pysharkbite.ZookeeperInstance( accumulo_cluster.instance, accumulo_cluster.zookeeper, 1000, conf)
def check(): model = apps.get_model(app_label='query', model_name='FileUpload') objs = model.objects.filter(status="NEW") for obj in objs: if obj.status == "NEW": import pysharkbite conf = pysharkbite.Configuration() conf.set("FILE_SYSTEM_ROOT", "/accumulo") model = apps.get_model(app_label='query', model_name='AccumuloCluster') accumulo_cluster = model.objects.first() print("Checking " + str(obj.uuid)) if accumulo_cluster is None: return print("Checking " + str(obj.uuid)) zk = pysharkbite.ZookeeperInstance(accumulo_cluster.instance, accumulo_cluster.zookeeper, 1000, conf) user = pysharkbite.AuthInfo("root", "secret", zk.getInstanceId()) connector = pysharkbite.AccumuloConnector(user, zk) indexTableOps = connector.tableOps("provenanceIndex") auths = pysharkbite.Authorizations() auths.addAuthorization("PROV") indexScanner = indexTableOps.createScanner(auths, 2) indexrange = pysharkbite.Range(str(obj.uuid)) indexScanner.addRange(indexrange) indexSet = indexScanner.getResultSet() rangelist = list() provops = connector.tableOps("provenance") scanner = provops.createScanner(auths, 10) for indexKeyValue in indexSet: value = indexKeyValue.getValue() protobuf = Uid_pb2.List() protobuf.ParseFromString(value.get().encode()) for uidvalue in protobuf.UID: shard = indexKeyValue.getKey().getColumnQualifier().split( "\u0000")[0] datatype = indexKeyValue.getKey().getColumnQualifier( ).split("\u0000")[1] startKey = pysharkbite.Key() stopKey = pysharkbite.Key() startKey.setRow(shard) stopKey.setRow(shard) startKey.setColumnFamily(datatype + "\x00" + uidvalue) stopKey.setColumnFamily(datatype + "\x00" + uidvalue + "\xff") rangelist.append( pysharkbite.Range(startKey, True, stopKey, False)) scanner = provops.createScanner(auths, 10) scanner.addRange( pysharkbite.Range(startKey, True, stopKey, False)) resultset = scanner.getResultSet() for keyvalue in resultset: key = keyvalue.getKey() value = keyvalue.getValue() eventid = key.getColumnFamily().split("\u0000")[1] fieldname = key.getColumnQualifier().split("\u0000")[0] fieldvalue = key.getColumnQualifier().split( "\u0000")[1] if (fieldname == "EVENTTYPE"): if fieldvalue == "DROP": obj.status = "COMPLETE" obj.save() break scanner.close() indexScanner.close()
def run_edge_query(query_id): model = apps.get_model(app_label='query', model_name='EdgeQuery') objs = model.objects.filter(query_id=query_id) for obj in objs: obj.running = True obj.save() import pysharkbite conf = pysharkbite.Configuration() conf.set("FILE_SYSTEM_ROOT", "/accumulo") model = apps.get_model(app_label='query', model_name='AccumuloCluster') accumulo_cluster = model.objects.first() if accumulo_cluster is None: return zk = pysharkbite.ZookeeperInstance(accumulo_cluster.instance, accumulo_cluster.zookeeper, 1000, conf) user = pysharkbite.AuthInfo("root", "secret", zk.getInstanceId()) connector = pysharkbite.AccumuloConnector(user, zk) auths = pysharkbite.Authorizations() if obj.auths: for auth in obj.auths.split(","): auths.addAuthorization(auth) sres_model = apps.get_model(app_label='query', model_name='ScanResult') res_model = apps.get_model(app_label='query', model_name='Result') sr = sres_model.objects.filter(query_id=obj.query_id).first() if not sr: print("No scan result, returning") return print("here") graphTableOps = connector.tableOps("graph") scanner = graphTableOps.createScanner(auths, 10) range = pysharkbite.Range(obj.query, True, obj.query + "\uffff" + "\uffff", False) ## for now the range should be this scanner.addRange(range) resultset = scanner.getResultSet() count = 0 try: for indexKeyValue in resultset: value = "0" ## row will be the to ## direction will be the cf to_value = "" direction = "one" try: protobuf = EdgeData_pb2.EdgeValue() protobuf.ParseFromString( indexKeyValue.getValue().get_bytes()) value = str(protobuf.count) + "/" + protobuf.uuid_string to_value = indexKeyValue.getKey().getRow().split( "\u0000")[1] direction = indexKeyValue.getKey().getColumnFamily().split( "/")[1] direction_split = direction.split("-") if len(direction_split ) != 2 or direction_split[0] == direction_split[1]: continue except Exception as e: print(e) continue except: continue scanresult = res_model.objects.create( scanResult=sr, value=value, row=to_value, cf=direction, cq=indexKeyValue.getKey().getColumnQualifier()) scanresult.save() count = count + 1 if count > 1000: break sr.is_finished = True sr.save() scanner.close() except Exception as e: print(e) except: print("An error occurred") pass ## user does not have PROV obj.running = False obj.finished = True obj.save()