Beispiel #1
0
def populateMetadata():
    import pysharkbite
    conf = pysharkbite.Configuration()
    conf.set("FILE_SYSTEM_ROOT", "/accumulo")

    model = apps.get_model(app_label='query', model_name='AccumuloCluster')
    accumulo_cluster = model.objects.first()
    if accumulo_cluster is None:
        return
    zk = pysharkbite.ZookeeperInstance(accumulo_cluster.instance,
                                       accumulo_cluster.zookeeper, 1000, conf)
    user = pysharkbite.AuthInfo("root", "secret", zk.getInstanceId())
    connector = pysharkbite.AccumuloConnector(user, zk)

    indexTableOps = connector.tableOps("DatawaveMetadata")

    auths = pysharkbite.Authorizations()

    indexScanner = indexTableOps.createScanner(auths, 100)
    indexrange = pysharkbite.Range()

    indexScanner.addRange(indexrange)
    indexScanner.fetchColumn("f", "")

    combinertxt = ""
    ## load the combiner from the file system and send it to accumulo
    with open('countgatherer.py', 'r') as file:
        combinertxt = file.read()
    combiner = pysharkbite.PythonIterator("MetadataCounter", combinertxt, 200)
    indexScanner.addIterator(combiner)
    indexSet = indexScanner.getResultSet()
    import json
    counts = 0
    mapping = {}
    for indexKeyValue in indexSet:
        value = indexKeyValue.getValue()
        key = indexKeyValue.getKey()
        if key.getColumnFamily() == "f":
            day = key.getColumnQualifier().split("\u0000")[1]
            dt = key.getColumnQualifier().split("\u0000")[0]
            if day in mapping:
                if key.getRow() in mapping[day]:
                    try:
                        mapping[day][key.getRow()] += int(value.get())
                    except:
                        pass
                else:
                    try:
                        mapping[day][key.getRow()] = int(value.get())
                    except:
                        pass
            else:
                mapping[day] = {}
                try:
                    mapping[day][key.getRow()] = int(value.get())
                except:
                    pass
    caches['metadata'].set("field", json.dumps(mapping), 3600 * 48)
    return json.dumps(mapping)
Beispiel #2
0
 def inity(self, replace=False):
     import pysharkbite
     self._zk = pysharkbite.ZookeeperInstance(self._instance, self._zks,
                                              1000, self._conf)
     self._user = pysharkbite.AuthInfo(self._username, self._password,
                                       self._zk.getInstanceId())
     if replace == True:
         self._connector = pysharkbite.AccumuloConnector(
             self._user, self._zk)
         self._tableOperations = self._connector.tableOps(self._table)
         self._securityOperations = self._connector.securityOps()
Beispiel #3
0
def get_uploads():
    model = apps.get_model(app_label='query', model_name='FileUpload')
    objs = model.objects.filter(status="NEW")
    haveNew = False
    for obj in objs:
        if obj.status == "NEW":
            caches['eventcount'].set("ingestcomplete", 95)
            haveNew = True
    if not haveNew:
        caches['eventcount'].set("ingestcomplete", 100)
        return
    import pysharkbite
    conf = pysharkbite.Configuration()
    conf.set("FILE_SYSTEM_ROOT", "/accumulo")
    model = apps.get_model(app_label='query', model_name='AccumuloCluster')
    accumulo_cluster = model.objects.first()
    if accumulo_cluster is None:
        return
    zk = pysharkbite.ZookeeperInstance(accumulo_cluster.instance,
                                       accumulo_cluster.zookeeper, 1000, conf)
    user = pysharkbite.AuthInfo("root", "secret", zk.getInstanceId())
    connector = pysharkbite.AccumuloConnector(user, zk)
    auths = pysharkbite.Authorizations()
    auths.addAuthorization("PROV")
    indexTableOps = connector.tableOps("provenanceIndex")
    indexScanner = indexTableOps.createScanner(auths, 10)
    indexrange = pysharkbite.Range()
    indexScanner.addRange(indexrange)
    indexScanner.fetchColumn("CONTENTURI", "")
    indexScanner.fetchColumn("TRANSITURI", "")
    indexSet = indexScanner.getResultSet()
    count = 0
    usercount = 0
    try:
        for indexKeyValue in indexSet:
            if indexKeyValue.getKey().getColumnFamily() == "CONTENTURI":
                count = count + 1
            else:
                usercount = usercount + 1
        if count > 0:
            caches['eventcount'].set("ingestcount", count, 3600 * 48)
        if usercount > 0:
            caches['eventcount'].set("useruploads", usercount, 3600 * 48)
        indexScanner.close()
    except:
        pass  ## user does not have PROV
Beispiel #4
0
table = args.table

if not password:
    print("Please enter your password")
    password = input()
    
if not table:
    table = "blahblahd"

import pysharkbite

conf = pysharkbite.Configuration()

conf.set ("FILE_SYSTEM_ROOT", "/accumulo");

zk = pysharkbite.ZookeeperInstance(args.instance, args.zookeepers, 1000, conf)

user = pysharkbite.AuthInfo(args.username, password, zk.getInstanceId()) 

try:
    connector = pysharkbite.AccumuloConnector(user, zk)


    tableOperations = connector.tableOps(table)

    if not tableOperations.exists(False):
        print ("Creating table " + table)
        tableOperations.create(False)  
    else:
        print (table + " already exists, so not creating it")  
    
Beispiel #5
0
    def mthd(self):

        import pysharkbite

        tableOperations = super().getTableOperations()

        ## validate that table is removed
        try:
            if not tableOperations.exists(False):
                print("Creating table")
                if not tableOperations.create(False):
                    print("Could not create table")
            else:
                print("Table already exists, so not creating it")

            auths = pysharkbite.Authorizations()
            """ Add authorizations """
            """ mutation.put("cf","cq","cv",1569786960) """

            tableOperations.remove()

            time.sleep(1)

            writer = tableOperations.createWriter(auths, 10)

            mutation = pysharkbite.Mutation("sow2")

            mutation.put("cf", "cq", "", 1569786960, "value")
            mutation.put("cf2", "cq2", "", 1569786960, "value2")
            """ no value """
            mutation.put("cf3", "cq3", "", 1569786960, "")

            writer.addMutation(mutation)

            writer.close()

            writer = tableOperations.createWriter(auths, 10)

            rng = range(0, 1000)
            for i in rng:
                row = ("row%i" % (i + 5))
                mutation = pysharkbite.Mutation(row)
                mutation.put("cf", "cq", "", 1569786960, "value")
                writer.addMutation(mutation)

            writer.close()

            print("Table not removed")
            sys.exit(1)
        except (RuntimeError, TypeError, NameError):
            print("Table successfully removed")
            # this is okay

        try:
            testzk = pysharkbite.ZookeeperInstance(None, None, 1000, None)
            print("Table not removed")
            sys.exit(1)
        except (RuntimeError, TypeError, NameError):
            print("Caught expected error")
            # this is okay

        try:
            writer = tableOperations.createWriter(None, 10)
            print("Expected error passing None")
            sys.exit(1)
        except (RuntimeError, TypeError, NameError):
            print("Caught expected error")
            # this is okay

        tableOperations.remove()
Beispiel #6
0
 def __init__(self):
     self.zk = pysharkbite.ZookeeperInstance(
         AccumuloCluster.objects.first().instance,
         AccumuloCluster.objects.first().zookeeper, 1000, conf)
Beispiel #7
0
    def __init__(self):
        parser = ArgumentParser(
            description="This is an Apache Accummulo Python connector")

        parser.add_argument("-i",
                            "--instance",
                            dest="instance",
                            help="Apache Accumulo Instance Name",
                            required=True)
        parser.add_argument("-z",
                            "--zookeepers",
                            dest="zookeepers",
                            help="Comma Separated Zookeeper List",
                            required=True)
        parser.add_argument("-u",
                            "--username",
                            dest="username",
                            help="User to access Apache Accumulo",
                            required=True)
        parser.add_argument(
            "-p",
            "--password",
            dest="password",
            help=
            "Password to access Apache Accumulo. May also be supplied at the command line"
        )
        parser.add_argument("-t",
                            "--table",
                            dest="table",
                            help="Table to create/update")
        parser.add_argument("-s",
                            "--solocation",
                            dest="shardobject",
                            help="Shared object Location")
        args = parser.parse_args()

        instance = args.instance
        zookeepers = args.zookeepers
        password = args.password
        table = args.table
        dll = args.shardobject

        print("Opening ", dll)
        py = cdll.LoadLibrary(dll)

        import pysharkbite

        self._conf = pysharkbite.Configuration()

        self._conf.set("FILE_SYSTEM_ROOT", "/accumulo")

        self._zk = pysharkbite.ZookeeperInstance(args.instance,
                                                 args.zookeepers, 1000,
                                                 self._conf)

        self._user = pysharkbite.AuthInfo(args.username, password,
                                          self._zk.getInstanceId())

        self._connector = pysharkbite.AccumuloConnector(self._user, self._zk)

        self._tableOperations = self._connector.tableOps(table)

        print("created connector")
Beispiel #8
0
def run(*args):
    admin_user = "******"
    admin_password = "******"
    auth = Auth()
    auth.auth = "TST"
    auth.save()
    democonfig = IngestConfiguration()
    democonfig.name = "democonfig"
    democonfig.use_provenance = True
    democonfig.post_location = "http://nifi:8181/contentListener"
    democonfig.save()

    instance = "uno"
    zookeepers = "192.168.1.88:2181"
    acc_user = "******"
    acc_pass = "******"
    for arg in args:
        if arg.startswith("password"):
            split = arg.split("=")
            if len(split) == 2:
                admin_password = split[1]
        if arg.startswith("username"):
            split = arg.split("=")
            if len(split) == 2:
                admin_user = split[1]
        if arg.startswith("instance"):
            split = arg.split("=")
            if len(split) == 2:
                admin_user = split[1]
        if arg.startswith("zookeepers"):
            split = arg.split("=")
            if len(split) == 2:
                zookeepers = split[1]
        if arg.startswith("accuser"):
            split = arg.split("=")
            if len(split) == 2:
                acc_user = split[1]
        if arg.startswith("accpass"):
            split = arg.split("=")
            if len(split) == 2:
                acc_pass = split[1]
    acc = AccumuloCluster()
    acc.instance = instance
    acc.zookeeper = zookeepers
    acc.user = acc_user
    acc.password = acc_pass
    acc.save()
    import pysharkbite
    while True:
        try:
            conf = pysharkbite.Configuration()
            conf.set("FILE_SYSTEM_ROOT", "/accumulo")
            zoo_keeper = pysharkbite.ZookeeperInstance(instance, zookeepers,
                                                       1000, conf)
            user = pysharkbite.AuthInfo(acc_user, acc_pass,
                                        zoo_keeper.getInstanceId())
            connector = pysharkbite.AccumuloConnector(user, zoo_keeper)
            security_ops = connector.securityOps()
            auths = pysharkbite.Authorizations()
            auths.addAuthorization("MTRCS")
            auths.addAuthorization("PROV")
            security_ops.grantAuthorizations(auths, acc_user)
            table_ops = connector.tableOps("shard")
            if not table_ops.exists(False):
                table_ops.create(False)
            table_ops = connector.tableOps("shardIndex")
            if not table_ops.exists(False):
                table_ops.create(False)
            table_ops = connector.tableOps("shardReverse")
            if not table_ops.exists(False):
                table_ops.create(False)
            table_ops = connector.tableOps("graph")
            if not table_ops.exists(False):
                table_ops.create(False)
            table_ops = connector.tableOps("provenance")
            if not table_ops.exists(False):
                table_ops.create(False)
            table_ops = connector.tableOps("provenanceIndex")
            if not table_ops.exists(False):
                table_ops.create(False)
            table_ops = connector.tableOps("provenanceReverseIndex")
            if not table_ops.exists(False):
                table_ops.create(False)
            break
        except RuntimeError as e:
            exc = str(e)
            if exc.find("Instance Id does not exist") > 0:
                pass
            else:
                break
Beispiel #9
0
def pouplateEventCountMetadata():
    import pysharkbite
    import time
    conf = pysharkbite.Configuration()
    conf.set("FILE_SYSTEM_ROOT", "/accumulo")
    model = apps.get_model(app_label='query', model_name='AccumuloCluster')
    accumulo_cluster = model.objects.first()
    if accumulo_cluster is None:
        return
    zoo_keeper = pysharkbite.ZookeeperInstance(accumulo_cluster.instance,
                                               accumulo_cluster.zookeeper,
                                               1000, conf)
    user = pysharkbite.AuthInfo("root", "secret", zoo_keeper.getInstanceId())
    connector = pysharkbite.AccumuloConnector(user, zoo_keeper)
    queryRanges = list()
    #last 15 days
    for dateinrange in getDateRange(-15):
        shardbegin = dateinrange.strftime("%Y%m%d")
        if caches['eventcount'].get(shardbegin) is None or caches[
                'eventcount'].get(shardbegin) == 0:
            queryRanges.append(shardbegin)
        else:
            pass  # don't add to range

    if len(queryRanges) > 0:
        ## all is cached

        user = pysharkbite.AuthInfo("root", "secret",
                                    zoo_keeper.getInstanceId())
        connector = pysharkbite.AccumuloConnector(user, zoo_keeper)

        indexTableOps = connector.tableOps("DatawaveMetrics")

        auths = pysharkbite.Authorizations()
        auths.addAuthorization("MTRCS")

        indexScanner = indexTableOps.createScanner(auths, 100)
        start = time.time()
        for dt in queryRanges:
            indexrange = pysharkbite.Range(dt, True, dt + "\uffff", False)
            indexScanner.addRange(indexrange)
        indexScanner.fetchColumn("EVENT_COUNT", "")

        combinertxt = ""
        ## load the combiner from the file system and send it to accumulo
        with open('metricscombiner.py', 'r') as file:
            combinertxt = file.read()
        combiner = pysharkbite.PythonIterator("MetadataCounter", combinertxt,
                                              200)
        indexScanner.addIterator(combiner)
        indexSet = indexScanner.getResultSet()

        counts = 0
        mapping = {}
        try:
            for indexKeyValue in indexSet:
                value = indexKeyValue.getValue()
                key = indexKeyValue.getKey()
                if key.getColumnFamily() == "EVENT_COUNT":
                    dt = key.getRow().split("_")[0]
                    if dt in mapping:
                        mapping[dt] += int(value.get())
                    else:
                        mapping[dt] = int(value.get())
            arr = [None] * len(mapping.keys())
            for field in mapping:
                caches['eventcount'].set(field, str(mapping[field]), 3600 * 48)
        except:
            pass
Beispiel #10
0
 def __init__(self):
     import pysharkbite
     model = apps.get_model(app_label='query', model_name='AccumuloCluster')
     AccumuloCluster = model.objects.first()
     self.zoo_keeper = pysharkbite.ZookeeperInstance(
         accumulo_cluster.instance, accumulo_cluster.zookeeper, 1000, conf)
Beispiel #11
0
def check():
    model = apps.get_model(app_label='query', model_name='FileUpload')
    objs = model.objects.filter(status="NEW")
    for obj in objs:
        if obj.status == "NEW":
            import pysharkbite
            conf = pysharkbite.Configuration()
            conf.set("FILE_SYSTEM_ROOT", "/accumulo")
            model = apps.get_model(app_label='query',
                                   model_name='AccumuloCluster')
            accumulo_cluster = model.objects.first()
            print("Checking " + str(obj.uuid))
            if accumulo_cluster is None:
                return
            print("Checking " + str(obj.uuid))
            zk = pysharkbite.ZookeeperInstance(accumulo_cluster.instance,
                                               accumulo_cluster.zookeeper,
                                               1000, conf)
            user = pysharkbite.AuthInfo("root", "secret", zk.getInstanceId())
            connector = pysharkbite.AccumuloConnector(user, zk)

            indexTableOps = connector.tableOps("provenanceIndex")

            auths = pysharkbite.Authorizations()
            auths.addAuthorization("PROV")

            indexScanner = indexTableOps.createScanner(auths, 2)

            indexrange = pysharkbite.Range(str(obj.uuid))

            indexScanner.addRange(indexrange)
            indexSet = indexScanner.getResultSet()

            rangelist = list()
            provops = connector.tableOps("provenance")
            scanner = provops.createScanner(auths, 10)
            for indexKeyValue in indexSet:
                value = indexKeyValue.getValue()
                protobuf = Uid_pb2.List()
                protobuf.ParseFromString(value.get().encode())
                for uidvalue in protobuf.UID:
                    shard = indexKeyValue.getKey().getColumnQualifier().split(
                        "\u0000")[0]
                    datatype = indexKeyValue.getKey().getColumnQualifier(
                    ).split("\u0000")[1]
                    startKey = pysharkbite.Key()
                    stopKey = pysharkbite.Key()
                    startKey.setRow(shard)
                    stopKey.setRow(shard)
                    startKey.setColumnFamily(datatype + "\x00" + uidvalue)
                    stopKey.setColumnFamily(datatype + "\x00" + uidvalue +
                                            "\xff")
                    rangelist.append(
                        pysharkbite.Range(startKey, True, stopKey, False))
                    scanner = provops.createScanner(auths, 10)
                    scanner.addRange(
                        pysharkbite.Range(startKey, True, stopKey, False))
                    resultset = scanner.getResultSet()
                    for keyvalue in resultset:
                        key = keyvalue.getKey()
                        value = keyvalue.getValue()
                        eventid = key.getColumnFamily().split("\u0000")[1]
                        fieldname = key.getColumnQualifier().split("\u0000")[0]
                        fieldvalue = key.getColumnQualifier().split(
                            "\u0000")[1]
                        if (fieldname == "EVENTTYPE"):
                            if fieldvalue == "DROP":
                                obj.status = "COMPLETE"
                                obj.save()
                                break
                    scanner.close()

            indexScanner.close()
Beispiel #12
0
def run_edge_query(query_id):
    model = apps.get_model(app_label='query', model_name='EdgeQuery')
    objs = model.objects.filter(query_id=query_id)
    for obj in objs:
        obj.running = True
        obj.save()
        import pysharkbite
        conf = pysharkbite.Configuration()
        conf.set("FILE_SYSTEM_ROOT", "/accumulo")
        model = apps.get_model(app_label='query', model_name='AccumuloCluster')
        accumulo_cluster = model.objects.first()
        if accumulo_cluster is None:
            return
        zk = pysharkbite.ZookeeperInstance(accumulo_cluster.instance,
                                           accumulo_cluster.zookeeper, 1000,
                                           conf)
        user = pysharkbite.AuthInfo("root", "secret", zk.getInstanceId())
        connector = pysharkbite.AccumuloConnector(user, zk)
        auths = pysharkbite.Authorizations()
        if obj.auths:
            for auth in obj.auths.split(","):
                auths.addAuthorization(auth)

        sres_model = apps.get_model(app_label='query', model_name='ScanResult')
        res_model = apps.get_model(app_label='query', model_name='Result')
        sr = sres_model.objects.filter(query_id=obj.query_id).first()
        if not sr:
            print("No scan result, returning")
            return
        print("here")

        graphTableOps = connector.tableOps("graph")
        scanner = graphTableOps.createScanner(auths, 10)
        range = pysharkbite.Range(obj.query, True,
                                  obj.query + "\uffff" + "\uffff",
                                  False)  ## for now the range should be this
        scanner.addRange(range)
        resultset = scanner.getResultSet()
        count = 0
        try:
            for indexKeyValue in resultset:
                value = "0"
                ## row will be the to
                ## direction will be the cf
                to_value = ""
                direction = "one"
                try:
                    protobuf = EdgeData_pb2.EdgeValue()
                    protobuf.ParseFromString(
                        indexKeyValue.getValue().get_bytes())
                    value = str(protobuf.count) + "/" + protobuf.uuid_string
                    to_value = indexKeyValue.getKey().getRow().split(
                        "\u0000")[1]
                    direction = indexKeyValue.getKey().getColumnFamily().split(
                        "/")[1]
                    direction_split = direction.split("-")
                    if len(direction_split
                           ) != 2 or direction_split[0] == direction_split[1]:
                        continue

                except Exception as e:
                    print(e)
                    continue
                except:
                    continue
                scanresult = res_model.objects.create(
                    scanResult=sr,
                    value=value,
                    row=to_value,
                    cf=direction,
                    cq=indexKeyValue.getKey().getColumnQualifier())
                scanresult.save()
                count = count + 1
                if count > 1000:
                    break
            sr.is_finished = True
            sr.save()
            scanner.close()
        except Exception as e:
            print(e)
        except:
            print("An error occurred")
            pass  ## user does not have PROV
        obj.running = False
        obj.finished = True
        obj.save()