def populateMetadata(): import pysharkbite conf = pysharkbite.Configuration() conf.set("FILE_SYSTEM_ROOT", "/accumulo") model = apps.get_model(app_label='query', model_name='AccumuloCluster') accumulo_cluster = model.objects.first() if accumulo_cluster is None: return zk = pysharkbite.ZookeeperInstance(accumulo_cluster.instance, accumulo_cluster.zookeeper, 1000, conf) user = pysharkbite.AuthInfo("root", "secret", zk.getInstanceId()) connector = pysharkbite.AccumuloConnector(user, zk) indexTableOps = connector.tableOps("DatawaveMetadata") auths = pysharkbite.Authorizations() indexScanner = indexTableOps.createScanner(auths, 100) indexrange = pysharkbite.Range() indexScanner.addRange(indexrange) indexScanner.fetchColumn("f", "") combinertxt = "" ## load the combiner from the file system and send it to accumulo with open('countgatherer.py', 'r') as file: combinertxt = file.read() combiner = pysharkbite.PythonIterator("MetadataCounter", combinertxt, 200) indexScanner.addIterator(combiner) indexSet = indexScanner.getResultSet() import json counts = 0 mapping = {} for indexKeyValue in indexSet: value = indexKeyValue.getValue() key = indexKeyValue.getKey() if key.getColumnFamily() == "f": day = key.getColumnQualifier().split("\u0000")[1] dt = key.getColumnQualifier().split("\u0000")[0] if day in mapping: if key.getRow() in mapping[day]: try: mapping[day][key.getRow()] += int(value.get()) except: pass else: try: mapping[day][key.getRow()] = int(value.get()) except: pass else: mapping[day] = {} try: mapping[day][key.getRow()] = int(value.get()) except: pass caches['metadata'].set("field", json.dumps(mapping), 3600 * 48) return json.dumps(mapping)
def getDocuments(cancellationtoken: CancellationToken, name: int, lookupInformation: LookupInformation, input: queue.SimpleQueue, outputQueue: queue.SimpleQueue): count = 0 while cancellationtoken.running(): docInfo = None try: try: if input.empty(): pass else: docInfo = input.get(timeout=1) except: pass # Handle empty queue here if not docInfo is None: tableOps = lookupInformation.getTableOps() scanner = tableOps.createScanner(lookupInformation.getAuths(), 5) startKey = pysharkbite.Key() endKey = pysharkbite.Key() startKey.setRow(docInfo.getShard()) docid = docInfo.getDataType() + "\x00" + docInfo.getDocId() startKey.setColumnFamily(docid) endKey.setRow(docInfo.getShard()) endKey.setColumnFamily(docid + "\xff") print("Searching for " + docInfo.getShard()) rng = pysharkbite.Range(startKey, True, endKey, True) scanner.addRange(rng) rangecount = 1 while rangecount < 10: try: docInfo = input.get(False) startKey = pysharkbite.Key() endKey = pysharkbite.Key() startKey.setRow(docInfo.getShard()) docid = docInfo.getDataType( ) + "\x00" + docInfo.getDocId() startKey.setColumnFamily(docid) endKey.setRow(docInfo.getShard()) endKey.setColumnFamily(docid + "\xff") rng = pysharkbite.Range(startKey, True, endKey, True) print("Searching for " + docInfo.getShard()) scanner.addRange(rng) rangecount = rangecount + 1 except: rangecount = 11 with open('jsoncombiner.py', 'r') as file: combinertxt = file.read() combiner = pysharkbite.PythonIterator( "PythonCombiner", combinertxt, 100) scanner.addIterator(combiner) try: count = count + scanDoc(scanner, outputQueue) except: pass else: time.sleep(0.5) except: e = sys.exc_info()[0] return True
scanner = tableOperations.createScanner(auths, 2) startKey = pysharkbite.Key() endKey = pysharkbite.Key() startKey.setRow("row") endKey.setRow("row3") range = pysharkbite.Range("a") scanner.addRange(range) iterator = pysharkbite.PythonIterator("PythonIterator", 100) iterator = iterator.onNext( "lambda x : sharkbite_iterator.Key( x.getRow(), 'new cf', x.getColumnQualifier()) " ) scanner.addIterator(iterator) resultset = scanner.getResultSet() for keyvalue in resultset: key = keyvalue.getKey() value = keyvalue.getValue() print(key.getRow() + ":" + key.getColumnFamily() + ":" + key.getColumnQualifier() + " [" + key.getColumnVisibility() + "] -- " + value.get()) """ delete your table if user did not create temp """
def mthd(self): import pysharkbite tableOperations = super().getTableOperations() if not tableOperations.exists(False): print("Creating table") if not tableOperations.create(False): print("Could not create table") else: print("Table already exists, so not creating it") auths = pysharkbite.Authorizations() """ Add authorizations """ """ mutation.put("cf","cq","cv",1569786960) """ writer = tableOperations.createWriter(auths, 10) mutation = pysharkbite.Mutation("sow2") mutation.put("cf", "cq", "", 1569786960, "value") mutation.put("cf2", "cq", "", 1569786960, "value") """ no value """ mutation.put("cf3", "cq", "", 1569786960, "value") writer.addMutation(mutation) writer.close() writer = tableOperations.createWriter(auths, 10) rng = range(0, 1000) for i in rng: row = ("row%i" % (i + 5)) mutation = pysharkbite.Mutation(row) mutation.put("cf", "cq", "", 1569786960, "value") writer.addMutation(mutation) writer.close() print("written") """ auths.addAuthorization("cv") """ scanner = tableOperations.createScanner(auths, 2) accumuloRange = pysharkbite.Range("sow", True, "sow3", False) scanner.addRange(accumuloRange) iterator = pysharkbite.PythonIterator("PythonIterator", 100) iterator = iterator.onNext( "lambda x : KeyValue( Key( x.getKey().getRow(), 'new cf', x.getKey().getColumnQualifier()), Value()) " ) scanner.addIterator(iterator) resultset = scanner.getResultSet() for keyvalue in resultset: key = keyvalue.getKey() assert ("sow2" == key.getRow()) value = keyvalue.getValue() if "cf" == key.getColumnFamily(): sys.exit(154) if "new cf" == key.getColumnFamily(): assert ("" == value.get()) scanner = tableOperations.createScanner(auths, 2) accumuloRange = pysharkbite.Range("sow", True, "sow3", False) scanner.addRange(accumuloRange) iterator = pysharkbite.PythonIterator("PythonIterator", 100) iterator = iterator.onNext( "lambda x : Key( x.getKey().getRow(), x.getKey().getColumnFamily(), 'new cq') " ) scanner.addIterator(iterator) resultset = scanner.getResultSet() for keyvalue in resultset: key = keyvalue.getKey() assert ("sow2" == key.getRow()) value = keyvalue.getValue() if "cq" == key.getColumnQualifier(): sys.exit(154) """ delete your table if user did not create temp """ tableOperations.remove()
def pouplateEventCountMetadata(): import pysharkbite import time conf = pysharkbite.Configuration() conf.set("FILE_SYSTEM_ROOT", "/accumulo") model = apps.get_model(app_label='query', model_name='AccumuloCluster') accumulo_cluster = model.objects.first() if accumulo_cluster is None: return zoo_keeper = pysharkbite.ZookeeperInstance(accumulo_cluster.instance, accumulo_cluster.zookeeper, 1000, conf) user = pysharkbite.AuthInfo("root", "secret", zoo_keeper.getInstanceId()) connector = pysharkbite.AccumuloConnector(user, zoo_keeper) queryRanges = list() #last 15 days for dateinrange in getDateRange(-15): shardbegin = dateinrange.strftime("%Y%m%d") if caches['eventcount'].get(shardbegin) is None or caches[ 'eventcount'].get(shardbegin) == 0: queryRanges.append(shardbegin) else: pass # don't add to range if len(queryRanges) > 0: ## all is cached user = pysharkbite.AuthInfo("root", "secret", zoo_keeper.getInstanceId()) connector = pysharkbite.AccumuloConnector(user, zoo_keeper) indexTableOps = connector.tableOps("DatawaveMetrics") auths = pysharkbite.Authorizations() auths.addAuthorization("MTRCS") indexScanner = indexTableOps.createScanner(auths, 100) start = time.time() for dt in queryRanges: indexrange = pysharkbite.Range(dt, True, dt + "\uffff", False) indexScanner.addRange(indexrange) indexScanner.fetchColumn("EVENT_COUNT", "") combinertxt = "" ## load the combiner from the file system and send it to accumulo with open('metricscombiner.py', 'r') as file: combinertxt = file.read() combiner = pysharkbite.PythonIterator("MetadataCounter", combinertxt, 200) indexScanner.addIterator(combiner) indexSet = indexScanner.getResultSet() counts = 0 mapping = {} try: for indexKeyValue in indexSet: value = indexKeyValue.getValue() key = indexKeyValue.getKey() if key.getColumnFamily() == "EVENT_COUNT": dt = key.getRow().split("_")[0] if dt in mapping: mapping[dt] += int(value.get()) else: mapping[dt] = int(value.get()) arr = [None] * len(mapping.keys()) for field in mapping: caches['eventcount'].set(field, str(mapping[field]), 3600 * 48) except: pass