Ejemplo n.º 1
0
 def __init__(self, k, wipe_segs=False):
     self.total_edges_inserted = 0
     self.k = k  # max nodes per segment
     self.consumer = kafka.KafkaConsumer('se')
     self.producer = kafka.KafkaProducer()
     self.gremlin = gremlin_query.Runner()
     self.next_seg_id = 1 + self.max_seg_id()
     self.next_node_id = 1 + self.max_node_id()
     if wipe_segs:
         self.drop_all_existing_segments()
Ejemplo n.º 2
0
def drop_all():
    # Even for very small transactions,
    # like "g.V().order().limit(20).drop().iterate()",
    # this sometimes reports GremlinServerError: Code [597]: SCRIPT_EVALUATION.
    #   The vertex or type has been removed [v[1831104]]
    # in which case verify no background tasks are inserting,
    # and if necessary then: stop, /opt/titan/bin/titan.sh clean, start.
    with gremlin_query.Runner() as gremlin:
        for x in ['V().has(\'segment:name\',within(\'byPID\',\'byTime\')).has(label,\'Segment\')']:
            for q in ['g.%s.drop().iterate()  ' % x,
                      'graph.tx().commit()     ']:
                print(q, gremlin.fetch(q))
Ejemplo n.º 3
0
def get_label_counts(with_edges=False):
    '''Queries titan with read throughput of ~2700 node/sec.'''
    queries = ['g.V().groupCount().by(label())']
    if with_edges:
        queries.append('g.E().groupCount().by(label())')

    cnt = {}
    with gremlin_query.Runner() as gremlin:
        for query in queries:
            for msg in gremlin.fetch(query):
                if msg.data:
                    assert len(msg.data) == 1
                    cnt.update(msg.data[0])

    cnt['total'] = sum(cnt.values())

    return sorted(['%6d  %s' % (cnt[k], k) for k in cnt.keys()])
Ejemplo n.º 4
0
def get_eventType_counts():
    queries = ["g.V().has('eventType').groupCount().by('eventType')"]

    cnt = {}
    with gremlin_query.Runner() as gremlin:
        for query in queries:
            for msg in gremlin.fetch(query):
                if msg.data:
                    assert len(msg.data) == 1
                    cnt.update(msg.data[0])

    cnt['total'] = sum(cnt.values())

    return sorted([
        '%6d  %s' %
        (cnt[k], k if k == 'total' else cdm.enums.Event(int(k)).name)
        for k in cnt.keys()
    ])
Ejemplo n.º 5
0
def renderSegments(verbose=False):
    with gremlin_query.Runner() as gremlin:

        vertices = gremlin_properties.fetch(gremlin, QUERYV)
        graph = {}
        for v in vertices:
            val = {}
            val['criteria'] = v['pid']
            val['name'] = v['segment:name']

            edges = gremlin_properties.fetch(gremlin, QUERYE.format(v.getId()))
            val['edges_out'] = [e.getId() for e in edges]

            graph[v.getId()] = val

    dot = toDot(graph)

    if verbose:
        print(dot)

    dot.format = 'svg'
    dot.render('static/seggraph.dot', view=False)
Ejemplo n.º 6
0
sys.path.append(os.path.expanduser('~/adapt/tools'))
import gremlin_query


def report(gremlin):

    stamps = []

    # Dates before 1971 fail the sanity check and are rejected.
    sane = 365 * 86400 * 1e6

    queries = [
        "g.V().values('startedAtTime').is(gt(%d)).min()" % sane,
        "g.V().values('startedAtTime').max()",
    ]

    for query in queries:
        for msg in gremlin.fetch(query):
            if msg.data:
                for usec in msg.data:
                    stamp = datetime.datetime.utcfromtimestamp(usec / 1e6)
                    print(stamp)
                    stamps.append(stamp)
    delta = stamps[1] - stamps[0]
    print(delta, 'elapsed time')


if __name__ == '__main__':
    with gremlin_query.Runner() as gremlin:
        report(gremlin)
Ejemplo n.º 7
0
def execute_graph_query(query):
    with gremlin_query.Runner() as g:
        return str(g.fetch(query))
Ejemplo n.º 8
0
    def compute_view_and_save(self):
        # extract features
        keys = sorted(self.features_queries.keys())
        QUERY = self.node_ids_query + ";if(IDS!=[]){"
        for i in range(0, len(keys)):
            if type(self.features_queries[keys[i]]) == type(dict()):
                QUERY += self.split_Q_to_many(
                    "yf{}".format(i), self.features_queries[keys[i]]['first'])
                if 'second' in self.features_queries[keys[i]].keys():
                    QUERY += self.split_Q_to_many(
                        "ys{}".format(i),
                        self.features_queries[keys[i]]['second'])
                if 'third' in self.features_queries[keys[i]].keys():
                    QUERY += self.split_Q_to_many(
                        "yt{}".format(i),
                        self.features_queries[keys[i]]['third'])
            else:
                QUERY += self.split_Q_to_many("x{}".format(i),
                                              self.features_queries[keys[i]])
        QUERY += "[IDS"
        for i in range(0, len(keys)):
            if type(self.features_queries[keys[i]]) == type(dict()):
                QUERY += ",[yf{}".format(i)
                if 'second' in self.features_queries[keys[i]].keys():
                    QUERY += ",ys{}".format(i)
                if 'third' in self.features_queries[keys[i]].keys():
                    QUERY += ",yt{}".format(i)
                QUERY += "]"
            else:
                QUERY += ",x{}".format(i)
        QUERY += "]}else [];"
        #        print(QUERY)
        #        if len(QUERY) > 0:
        #            return False
        log.info("Extracting features for " + self.view_type + "...")
        with gremlin_query.Runner() as gremlin:
            try:
                result = gremlin.fetch_data(QUERY, bindings=bindings)
            except:
                log.exception("Exception at query:" + QUERY)
                return False

        if result == []:
            log.info("Found 0 " + self.view_type + " nodes")
            return False

        log.info("Writing " + self.view_type + " view features to file: " +
                 self.feature_file)
        f = open(self.feature_file, "w")
        f.write("id")
        for k in keys:
            f.write("," + k)
        f.write("\n")
        for i in range(0, len(result[0])):
            f.write(str(result[0][i]))
            j = 1
            for k in keys:
                res = 0
                try:
                    if type(self.features_queries[k]) == type(dict()):
                        if self.features_queries[k]['operator'] == 'subTime':
                            res = (result[j][0][i] - result[j][1][i]) / 1.0e6
                        elif self.features_queries[k][
                                'operator'] == 'RELUTime':
                            res = (result[j][0][i] - result[j][1][i]) / 1.0e6
                            if res < 0:
                                res = 0
                        elif self.features_queries[k][
                                'operator'] == 'div(SubTime)':
                            res = result[j][0][i] / (
                                (result[j][1][i] - result[j][2][i]) / 1.0e6)
                        elif self.features_queries[k][
                                'operator'] == '(SubTime)div':
                            res = ((result[j][0][i] - result[j][1][i]) /
                                   1.0e6) / result[j][2][i]
                        else:
                            log.info("Unrecognized operator: " +
                                     self.features_queries[k]['operator'])
                    else:
                        res = result[j][i]
                except:
                    log.exception("Exception: i=" + str(i) + ", j=" + str(j) +
                                  ", k=" + k)
                f.write(',' + str(res))
                j += 1
            f.write('\n')
        f.close()
        log.info("Writing " + self.feature_file + " Finished")
        return True
Ejemplo n.º 9
0
    def attach_scores_to_db(self, view_stats, percentage=5.0):
        with open(self.score_file) as f:
            for i, l in enumerate(f):
                pass
        total_nodes = i
        view_stats.number_nodes = total_nodes
        cutoff = min(math.ceil(total_nodes * (percentage / 100.0)), 500)
        view_stats.number_nodes_attached = cutoff
        max_score = 0
        max_id = 0
        max_feature = None
        cnt = 0
        QUERY = ""
        binds = {
            'atype': 'anomalyType',
            'ascore': 'anomalyScore',
            'sin': 'segment:includes',
            'afeature': 'anomalyFeature'
        }
        with gremlin_query.Runner() as gremlin:
            with open(self.score_file, 'r') as csvfile:
                reader = csv.DictReader(csvfile)
                for row in reader:
                    QUERY += "x={id};t='{type}';s={score};g.V(x).property(atype,t).next();g.V(x).property(ascore,s).next();".format(
                        id=row['id'],
                        type=self.view_type,
                        score=row['anomaly_score'])
                    feature = "["
                    for k in sorted(row.keys()):
                        if k != 'id' and k != 'anomaly_score':
                            if feature != "[":
                                feature += ","
                            feature += k + ":" + str(row[k])
                    feature += "]"
                    feature = "Rnk:" + str(cnt + 1) + "/" + str(
                        total_nodes) + feature
                    QUERY += "f='{feat}';g.V(x).property(afeature,f).next();".format(
                        feat=feature)
                    log.info("Adding anomaly scores to id " + row['id'] +
                             " (" + self.view_type + ", " +
                             row['anomaly_score'] + ")")
                    if float(row['anomaly_score']) > max_score:
                        max_score = float(row['anomaly_score'])
                        max_id = row['id']
                        max_feature = feature
                    cnt = cnt + 1
                    if cnt >= cutoff:
                        break
                    if len(QUERY) > 20000:
                        log.info("size of QUERY = " + str(len(QUERY)))
                        try:
                            log.info('Attaching anomaly score for ' +
                                     str(cnt) + ' nodes')
                            gremlin.fetch_data(QUERY, binds)
                            log.info('Anomaly score attachment done for ' +
                                     str(cnt) + ' nodes')
                        except:
                            log.exception("Exception at query:" + QUERY)
                        QUERY = ""

                QUERY += "x={id};t='{type}';s={score};f='{feat}';IDS=g.V(x).in(sin).id().toList().toArray();if(IDS!=[]){{g.V(IDS).property(atype,t).next();g.V(IDS).property(ascore,s).next();g.V(IDS).property(afeature,f).next();}};".format(
                    id=max_id,
                    type=self.view_type,
                    score=max_score,
                    feat=max_feature)
                log.info("size of QUERY = " + str(len(QUERY)))
                #            log.info("Attaching anomaly scores to top " + str(cutoff) + " anomalous nodes (threshold=min(" + str(percentage) + "%,1000))...")
                try:
                    gremlin.fetch_data(QUERY, binds)
                    log.info('Anomaly score attachment done for view ' +
                             self.view_type)
                except:
                    log.exception("Exception at query:" + QUERY)