Ejemplo n.º 1
0
def suspicious_queries(date, ip=None, query=None, limit=250):

    db = Configuration.db()
    sq_query = ("""
            SELECT STRAIGHT_JOIN
                ds.unix_tstamp,frame_len,ds.ip_dst,ds.dns_qry_name,
                dns_qry_class,dns_qry_type,dns_qry_rcode,ml_score,tld,
                query_rep,hh,dns_qry_class_name,dns_qry_type_name,
                dns_qry_rcode_name,network_context
            FROM {0}.dns_scores ds
            LEFT JOIN {0}.dns_threat_investigation dt
                ON  (ds.dns_qry_name = dt.dns_qry_name)
            WHERE
                ds.y={1} AND ds.m={2} AND ds.d={3}
                AND (dt.dns_qry_name is NULL)
            """).format(db, date.year, date.month, date.day)

    sq_filter = ""
    sq_filter += " AND ds.ip_dst = '{0}'".format(ip) if ip else ""
    sq_filter += " AND ds.dns_qry_name LIKE '%{0}%'".format(
        query) if query else ""
    sq_filter += " ORDER BY ds.ml_score limit {0}".format(limit)

    sq_query = sq_query + sq_filter
    return ImpalaEngine.execute_query_as_list(sq_query)
Ejemplo n.º 2
0
def suspicious_requests(date,uri=None,ip=None,limit=250):

    db = Configuration.db()
    proxy_query = ("""
	SELECT STRAIGHT_JOIN
	    ps.tdate,ps.time,ps.clientip,ps.host,ps.reqmethod,ps.useragent,
        ps.resconttype,ps.duration,ps.username,ps.webcat,ps.referer,
        ps.respcode,ps.uriport,ps.uripath,ps.uriquery,ps.serverip,ps.scbytes,
        ps.csbytes,ps.fulluri,ps.ml_score,ps.uri_rep,ps.respcode_name,
        ps.network_context
	FROM
	    {0}.proxy_scores ps
	LEFT JOIN
	    {0}.proxy_threat_investigation pt
	    ON (ps.fulluri = pt.fulluri)
	WHERE
	    ps.y={1} AND ps.m={2} AND ps.d={3}
	    AND (pt.fulluri is NULL)
    """).format(db,date.year,date.month,date.day)


    p_filter = ""
    p_filter += " AND ps.fulluri LIKE '%{0}%'".format(uri) if uri else ""
    p_filter += " AND ps.clientip = '{0}'".format(ip) if ip else ""
    p_filter += " ORDER BY ps.ml_score limit {0}".format(limit)
    proxy_query = proxy_query + p_filter
    return ImpalaEngine.execute_query_as_list(proxy_query)
Ejemplo n.º 3
0
def create_time_line(anchor,inbound, outbound, twoway,date):

    top_keys = []
    if len(twoway) > 0: top_keys.extend(twoway.keys())
    if len(outbound) > 0: top_keys.extend(outbound.keys())
    if len(inbound) > 0: top_keys.extend(inbound.keys())


    db = Configuration.db()

    imp_query =("""
        INSERT INTO TABLE {0}.flow_timeline PARTITION (y={4}, m={5},d={6})
        SELECT
            '{7}' ,min(treceived) as tstart, max(treceived) as tend,
            sip as srcIP,dip as dstip, proto as proto, sport as sport,
            dport AS dport, ipkt as ipkt, ibyt as ibyt
        FROM
            {0}.flow
        WHERE y={4} AND m={5} AND d={6}
        AND ((dip IN({1}) AND sip ='{2}') OR (sip IN({1}) AND dip ='{2}'))
        GROUP BY sip, dip, proto, sport, dport, ipkt, ibyt
        ORDER BY tstart
        LIMIT {3}
    """)

    ips = "'" + "','".join(top_keys) + "'"
    imp_query = imp_query.format(db,ips,anchor,1000,date.year,date.month, date.day,anchor)

    if ImpalaEngine.execute_query(imp_query):
        return "Timeline successfully created \n"
    else:
        return "Timeline couldn't be created \n"
Ejemplo n.º 4
0
def expanded_search(date, query=None, ip=None, limit=20):

    if not ip and not query:
        return False

    db = Configuration.db()
    if ip:
        count = "dns_qry_name"
        filter_param = "ip_dst"
        filter_value = ip
    else:
        count = "ip_dst"
        filter_param = "dns_qry_name"
        filter_value = query

    expanded_query = ("""
   		SELECT
    		    COUNT({0}) as total,dns_qry_name,ip_dst
		FROM
		    {1}.dns
		WHERE y={2} AND m={3} AND d={4}
		AND {5} = '{6}'
		GROUP BY {0},{5}
		ORDER BY total DESC
		LIMIT {7}
    """).format(count,db,date.year,date.month,date.day,\
    filter_param,filter_value,limit if limit else 20)

    return ImpalaEngine.execute_query_as_list(expanded_query)
Ejemplo n.º 5
0
def expanded_search(date,query=None,ip=None,limit=20):

    if not ip and not query:
        return False

    db = Configuration.db()
    if ip:
	count = "dns_qry_name"
        filter_param = "ip_dst"
	filter_value = ip
    else:
	count = "ip_dst"
	filter_param = "dns_qry_name"
	filter_value = query

    expanded_query = ("""
   		SELECT
    		    COUNT({0}) as total,dns_qry_name,ip_dst
		FROM
		    {1}.dns
		WHERE y={2} AND m={3} AND d={4}
		AND {5} = '{6}'
		GROUP BY {0},{5}
		ORDER BY total DESC
		LIMIT {7}
    """).format(count,db,date.year,date.month,date.day,\
    filter_param,filter_value,limit if limit else 20)

    return ImpalaEngine.execute_query_as_list(expanded_query)
Ejemplo n.º 6
0
def create_time_line(anchor,inbound, outbound, twoway,date):

    top_keys = []
    if len(twoway) > 0: top_keys.extend(twoway.keys())
    if len(outbound) > 0: top_keys.extend(outbound.keys())
    if len(inbound) > 0: top_keys.extend(inbound.keys())


    db = Configuration.db()

    imp_query =("""
        INSERT INTO TABLE {0}.flow_timeline PARTITION (y={4}, m={5},d={6})
        SELECT
            '{7}' ,min(treceived) as tstart, max(treceived) as tend,
            sip as srcIP,dip as dstip, proto as proto, sport as sport,
            dport AS dport, ipkt as ipkt, ibyt as ibyt
        FROM
            {0}.flow
        WHERE y={4} AND m={5} AND d={6}
        AND ((dip IN({1}) AND sip ='{2}') OR (sip IN({1}) AND dip ='{2}'))
        GROUP BY sip, dip, proto, sport, dport, ipkt, ibyt
        ORDER BY tstart
        LIMIT {3}
    """)

    ips = "'" + "','".join(top_keys) + "'"
    imp_query = imp_query.format(db,ips,anchor,1000,date.year,date.month, date.day,anchor)

    if ImpalaEngine.execute_query(imp_query):
        return "Timeline successfully created \n"
    else:
        return "Timeline couldn't be created \n"
Ejemplo n.º 7
0
def suspicious_requests(date, uri=None, ip=None, limit=250):

    db = Configuration.db()
    proxy_query = ("""
	SELECT STRAIGHT_JOIN
	    ps.tdate,ps.time,ps.clientip,ps.host,ps.reqmethod,ps.useragent,
        ps.resconttype,ps.duration,ps.username,ps.webcat,ps.referer,
        ps.respcode,ps.uriport,ps.uripath,ps.uriquery,ps.serverip,ps.scbytes,
        ps.csbytes,ps.fulluri,ps.ml_score,ps.uri_rep,ps.respcode_name,
        ps.network_context
	FROM
	    {0}.proxy_scores ps
	LEFT JOIN
	    {0}.proxy_threat_investigation pt
	    ON (ps.fulluri = pt.fulluri)
	WHERE
	    ps.y={1} AND ps.m={2} AND ps.d={3}
	    AND (pt.fulluri is NULL)
    """).format(db, date.year, date.month, date.day)

    p_filter = ""
    p_filter += " AND ps.fulluri LIKE '%{0}%'".format(uri) if uri else ""
    p_filter += " AND ps.clientip = '{0}'".format(ip) if ip else ""
    p_filter += " ORDER BY ps.ml_score limit {0}".format(limit)
    proxy_query = proxy_query + p_filter
    return ImpalaEngine.execute_query_as_list(proxy_query)
Ejemplo n.º 8
0
def chord_details(ip,date):

    db = Configuration.db()
    chord_query =  ("""
            SELECT
                srcip,dstip,ibyt,ipkt
            FROM {0}.flow_chords
            WHERE  y={1} AND m={2} AND d={3} AND ip_threat='{4}'
            """).format(db,date.year,date.month,date.day,ip)

    return ImpalaEngine.execute_query_as_list(chord_query)
Ejemplo n.º 9
0
def chord_details(ip,date):

    db = Configuration.db()
    chord_query =  ("""
            SELECT
                srcip,dstip,ibyt,ipkt
            FROM {0}.flow_chords
            WHERE  y={1} AND m={2} AND d={3} AND ip_threat='{4}'
            """).format(db,date.year,date.month,date.day,ip)

    return ImpalaEngine.execute_query_as_list(chord_query)
Ejemplo n.º 10
0
def create_dendro(expanded_search, date, anchor):

    db = Configuration.db()
    for row in expanded_search:
        dendro_query = ("""
		INSERT INTO {0}.dns_threat_dendro PARTITION (y={1}, m={2},d={3})
		VALUES ( '{4}',{5},'{6}','{7}')
		""")\
               .format(db,date.year,date.month,date.day,anchor,\
               row["total"],row["dnsQuery"],row["clientIp"])

        ImpalaEngine.execute_query(dendro_query)
Ejemplo n.º 11
0
def create_dendro(expanded_search,date,anchor):

    db = Configuration.db()
    for row in expanded_search:
	dendro_query = ("""
		INSERT INTO {0}.dns_threat_dendro PARTITION (y={1}, m={2},d={3})
		VALUES ( '{4}',{5},'{6}','{7}')
		""")\
        .format(db,date.year,date.month,date.day,anchor,\
        row["total"],row["dnsQuery"],row["clientIp"])

	ImpalaEngine.execute_query(dendro_query)
Ejemplo n.º 12
0
def get_scored_requests(date):

    db = Configuration.db()
    sc_query = ("""
                SELECT
                    tdate,fulluri,uri_sev
                FROM
                    {0}.proxy_threat_investigation
                WHERE
                    y={1} AND m={2} AND d={3}
                """).format(db, date.year, date.month, date.day)

    return ImpalaEngine.execute_query_as_list(sc_query)
Ejemplo n.º 13
0
def time_line(date, uri):

    db = Configuration.db()
    time_line_query = ("""
            SELECT
		p_threat,tstart,tend,duration,clientip,respcode,respcodename
            FROM {0}.proxy_timeline
            WHERE
                y={1} AND m={2} AND d={3}
                AND p_threat = '{4}'
            """).format(db, date.year, date.month, date.day, uri)

    return ImpalaEngine.execute_query_as_list(time_line_query)
Ejemplo n.º 14
0
def time_line(date,uri):

    db = Configuration.db()
    time_line_query = ("""
            SELECT
		p_threat,tstart,tend,duration,clientip,respcode,respcodename
            FROM {0}.proxy_timeline
            WHERE
                y={1} AND m={2} AND d={3}
                AND p_threat = '{4}'
            """).format(db,date.year,date.month,date.day,uri)

    return ImpalaEngine.execute_query_as_list(time_line_query)
Ejemplo n.º 15
0
def get_scored_requests(date):

    db = Configuration.db()
    sc_query =  ("""
                SELECT
                    tdate,fulluri,uri_sev
                FROM
                    {0}.proxy_threat_investigation
                WHERE
                    y={1} AND m={2} AND d={3}
                """).format(db,date.year,date.month,date.day)

    return ImpalaEngine.execute_query_as_list(sc_query)
Ejemplo n.º 16
0
def  get_scored_connections(date):

    db = Configuration.db()
    sc_query =  ("""
                SELECT
                    unix_tstamp,ip_dst,dns_qry_name,ip_sev,dns_sev
                FROM
                    {0}.dns_threat_investigation
                WHERE
                    y={1} AND m={2} AND d={3}
                """).format(db,date.year,date.month,date.day)

    return ImpalaEngine.execute_query_as_list(sc_query)
Ejemplo n.º 17
0
def get_scored_connections(date):

    db = Configuration.db()
    sc_query = ("""
                SELECT
                    unix_tstamp,ip_dst,dns_qry_name,ip_sev,dns_sev
                FROM
                    {0}.dns_threat_investigation
                WHERE
                    y={1} AND m={2} AND d={3}
                """).format(db, date.year, date.month, date.day)

    return ImpalaEngine.execute_query_as_list(sc_query)
Ejemplo n.º 18
0
def client_details(date, ip):

    db = Configuration.db()
    client_query =("""
            SELECT
                ip_dst,dns_a,dns_qry_name,ip_dst
            FROM
                {0}.dns_dendro
            WHERE
                y={1} AND m={2} AND d={3}
                AND ip_dst='{4}'
            """).format(db,date.year,date.month,date.day,ip)

    return ImpalaEngine.execute_query_as_list(client_query)
Ejemplo n.º 19
0
def expanded_search(date, uri):

    db = Configuration.db()
    expanded_query = ("""
			SELECT p_date, p_time, clientip, username, duration, fulluri,\
			    webcat, respcode, reqmethod,useragent, resconttype,\
			    referer, uriport, serverip, scbytes, csbytes
			FROM {0}.proxy
			WHERE y='{1}' AND m='{2}' AND d='{3}'
			AND (fulluri='{4}' OR referer ='{4}')
			ORDER BY p_time
			""")\
            .format(db,date.year,str(date.month).zfill(2),str(date.day).zfill(2),uri)
    return ImpalaEngine.execute_query_as_list(expanded_query)
Ejemplo n.º 20
0
def time_line(ip,date):

    db = Configuration.db()
    time_line_query = ("""
        SELECT
            ip_threat,tstart,tend,srcip,dstip,proto,
		    sport,dport,ipkt,ibyt
        FROM {0}.flow_timeline
        WHERE
            y={1} AND m={2} AND d={3}
            AND ip_threat = '{4}'
        """).format(db,date.year,date.month,date.day,ip)

    return ImpalaEngine.execute_query_as_list(time_line_query)
Ejemplo n.º 21
0
def get_scored_connections(date):


    db = Configuration.db()
    scored_query = ("""
            SELECT
                tstart,srcip,dstip,srcport,dstport,score
            FROM
                {0}.flow_threat_investigation
            WHERE
                y={1} AND m={2} AND d={3}
            """).format(db,date.year,date.month,date.day)

    return ImpalaEngine.execute_query_as_list(scored_query)
Ejemplo n.º 22
0
def expanded_search(date,uri):

    db = Configuration.db()
    expanded_query = ("""
			SELECT p_date, p_time, clientip, username, duration, fulluri,\
			    webcat, respcode, reqmethod,useragent, resconttype,\
			    referer, uriport, serverip, scbytes, csbytes
			FROM {0}.proxy
			WHERE y='{1}' AND m='{2}' AND d='{3}'
			AND (fulluri='{4}' OR referer ='{4}')
			ORDER BY p_time
			""")\
            .format(db,date.year,str(date.month).zfill(2),str(date.day).zfill(2),uri)
    return ImpalaEngine.execute_query_as_list(expanded_query)
Ejemplo n.º 23
0
def client_details(date, ip):

    db = Configuration.db()
    client_query = ("""
            SELECT
                ip_dst,dns_a,dns_qry_name,ip_dst
            FROM
                {0}.dns_dendro
            WHERE
                y={1} AND m={2} AND d={3}
                AND ip_dst='{4}'
            """).format(db, date.year, date.month, date.day, ip)

    return ImpalaEngine.execute_query_as_list(client_query)
Ejemplo n.º 24
0
def get_scored_connections(date):


    db = Configuration.db()
    scored_query = ("""
            SELECT
                tstart,srcip,dstip,srcport,dstport,score
            FROM
                {0}.flow_threat_investigation
            WHERE
                y={1} AND m={2} AND d={3}
            """).format(db,date.year,date.month,date.day)

    return ImpalaEngine.execute_query_as_list(scored_query)
Ejemplo n.º 25
0
def time_line(ip,date):

    db = Configuration.db()
    time_line_query = ("""
        SELECT
            ip_threat,tstart,tend,srcip,dstip,proto,
		    sport,dport,ipkt,ibyt
        FROM {0}.flow_timeline
        WHERE
            y={1} AND m={2} AND d={3}
            AND ip_threat = '{4}'
        """).format(db,date.year,date.month,date.day,ip)

    return ImpalaEngine.execute_query_as_list(time_line_query)
Ejemplo n.º 26
0
def ingest_summary(start_date,end_date):

    db = Configuration.db()
    is_query = ("""
                SELECT
                    tdate,total
                FROM {0}.proxy_ingest_summary
                WHERE
                    ( y >= {1} and y <= {2}) AND
                    ( m >= {3} and m <= {4}) AND
                    ( d >= {5} and d <= {6})
                """)\
                .format(db,start_date.year,end_date.year,start_date.month,end_date.month, start_date.day, end_date.day)

    return ImpalaEngine.execute_query_as_list(is_query)
Ejemplo n.º 27
0
def ingest_summary(start_date, end_date):

    db = Configuration.db()
    is_query = ("""
                SELECT
                    tdate,total
                FROM {0}.proxy_ingest_summary
                WHERE
                    ( y >= {1} and y <= {2}) AND
                    ( m >= {3} and m <= {4}) AND
                    ( d >= {5} and d <= {6})
                """)\
                .format(db,start_date.year,end_date.year,start_date.month,end_date.month, start_date.day, end_date.day)

    return ImpalaEngine.execute_query_as_list(is_query)
Ejemplo n.º 28
0
def details(frame_time, query):

    db = Configuration.db()
    details_query = ("""
            SELECT
		unix_tstamp,frame_len,ip_dst,ip_src,dns_qry_name,dns_qry_class,
		dns_qry_type,dns_qry_rcode,dns_a,dns_qry_type_name,
		dns_qry_rcode_name,dns_qry_class_name
            FROM
                {0}.dns_edge
            WHERE
                y={1} AND m={2} AND d={3} AND hh={4} AND dns_qry_name = '{5}'
            """).format(db,frame_time.year,frame_time.month,frame_time.day,\
            frame_time.hour,query)

    return ImpalaEngine.execute_query_as_list(details_query)
Ejemplo n.º 29
0
def details(frame_time, query):

    db = Configuration.db()
    details_query = ("""
            SELECT
		unix_tstamp,frame_len,ip_dst,ip_src,dns_qry_name,dns_qry_class,
		dns_qry_type,dns_qry_rcode,dns_a,dns_qry_type_name,
		dns_qry_rcode_name,dns_qry_class_name
            FROM
                {0}.dns_edge
            WHERE
                y={1} AND m={2} AND d={3} AND hh={4} AND dns_qry_name = '{5}'
            """).format(db,frame_time.year,frame_time.month,frame_time.day,\
            frame_time.hour,query)

    return ImpalaEngine.execute_query_as_list(details_query)
Ejemplo n.º 30
0
def story_board(date):

    db = Configuration.db()
    sb_query = ("""
            SELECT
                p_threat,title,text
            FROM
                {0}.proxy_storyboard
            WHERE
                y={1} AND m={2} AND d={3}
            """).format(db, date.year, date.month, date.day)

    results = ImpalaEngine.execute_query_as_list(sb_query)
    for row in results:
        row["text"] = row["text"].replace("\n", "\\n")
    return results
Ejemplo n.º 31
0
def story_board(date):

    db = Configuration.db()
    sb_query= ("""
            SELECT
                ip_threat,title,text
            FROM
                {0}.flow_storyboard
            WHERE
                y={1} AND m={2} AND d={3}
            """).format(db,date.year,date.month,date.day)

    results = ImpalaEngine.execute_query_as_list(sb_query)
    for row in results:
	       row["text"] = row["text"].replace("\n","\\n")
    return results
Ejemplo n.º 32
0
def ingest_summary(start_date,end_date):

    db = Configuration.db()

    daterange_select = daterange_query(start_date, end_date)

    is_query = ("""
            SELECT
                tdate,total
            FROM {0}.flow_ingest_summary
            WHERE
                {1}
            ORDER BY tdate
            """).format(db, daterange_select)

    return ImpalaEngine.execute_query_as_list(is_query)
Ejemplo n.º 33
0
def save_comments(anchor, ip, query, title, text, date):

    db = Configuration.db()
    sb_query = ("""
            SELECT
                ip_threat,dns_threat,title,text
            FROM
                {0}.dns_storyboard
            WHERE
                y = {1} AND m= {2} AND d={3}
            """).format(db, date.year, date.month, date.day)
    sb_data = ImpalaEngine.execute_query_as_list(sb_query)

    # find value if already exists.
    saved = False
    for item in sb_data:
        if item["ip_threat"] == anchor or item["dns_threat"] == anchor:
            item["title"] = title
            item["text"] = text
            saved = True

    if not saved:
        sb_data.append({
            'text': text,
            'ip_threat': str(ip),
            'title': title,
            'dns_threat': query
        })

    #remove old file.
    app_path = Configuration.spot()
    old_file = "{0}/dns/hive/oa/storyboard/y={1}/m={2}/d={3}/"\
    .format(app_path,date.year,date.month,date.day)

    HDFSClient.delete_folder(old_file, "impala")
    ImpalaEngine.execute_query("invalidate metadata")

    for item in sb_data:
        insert_query = ("""
         	INSERT INTO {0}.dns_storyboard PARTITION(y={1} , m={2} ,d={3})
            	VALUES ( '{4}', '{5}', '{6}','{7}')
            	""")\
                       .format(db,date.year,date.month,date.day,\
                       item["ip_threat"],item["dns_threat"],item["title"],item["text"])
        ImpalaEngine.execute_query(insert_query)

    return True
Ejemplo n.º 34
0
def details(src_ip,dst_ip,date):

    db = Configuration.db()
    details_query = ("""
            SELECT
                tstart,srcip,dstip,sport,dport,proto,flags,
                tos,ibyt,ipkt,input,output,rip,obyt,opkt
            FROM {0}.flow_edge
            WHERE
                y={1} AND m={2} AND d={3} AND hh={4} AND mn={5}
                AND ((srcip='{6}' AND dstip='{7}')
                OR  (srcip='{7}' AND dstip='{6}'))
            ORDER BY tstart
            """).format(db,date.year,date.month,date.day,date.hour, \
                        date.minute,src_ip,dst_ip)

    return ImpalaEngine.execute_query_as_list(details_query)
Ejemplo n.º 35
0
def details(src_ip,dst_ip,date):

    db = Configuration.db()
    details_query = ("""
            SELECT
                tstart,srcip,dstip,sport,dport,proto,flags,
                tos,ibyt,ipkt,input,output,rip,obyt,opkt
            FROM {0}.flow_edge
            WHERE
                y={1} AND m={2} AND d={3} AND hh={4} AND mn={5}
                AND ((srcip='{6}' AND dstip='{7}')
                OR  (srcip='{7}' AND dstip='{6}'))
            ORDER BY tstart
            """).format(db,date.year,date.month,date.day,date.hour, \
                        date.minute,src_ip,dst_ip)

    return ImpalaEngine.execute_query_as_list(details_query)
Ejemplo n.º 36
0
def ingest_summary(start_date,end_date):

    db = Configuration.db()
    is_query = ("""
            SELECT
                tdate,total
            FROM {0}.flow_ingest_summary
            WHERE
                ( y >= {1} AND y <= {2}) AND
                ( m >= {3} AND m <= {4}) AND
                ( d >= {5} AND d <= {6})
            ORDER BY tdate
            """).format(db,start_date.year,end_date.year, \
                        start_date.month,end_date.month, \
                        start_date.day, end_date.day)

    return ImpalaEngine.execute_query_as_list(is_query)
Ejemplo n.º 37
0
def save_comment(ip,title,text,date):

    #Get current table info.
    db = Configuration.db()
    sb_query = ("""
            SELECT
                ip_threat,title,text
            FROM
                {0}.flow_storyboard
            WHERE
                y = {1} AND m= {2} AND d={3}
            """).format(db,date.year,date.month,date.day)

    sb_data = ImpalaEngine.execute_query_as_list(sb_query)

    # find value if already exists.
    saved = False
    for item in sb_data:
        if item["ip_threat"] == ip:
            item["title"] = title
            item["text"] = text
            saved = True

    if not saved:
        sb_data.append({'text': text, 'ip_threat': str(ip), 'title': title})

    #remove old file.
    app_path = Configuration.spot()
    old_file = "{0}/flow/hive/oa/storyboard/y={1}/m={2}/d={3}/" \
    .format(app_path,date.year,date.month,date.day)

    # remove file manually to allow the comments update.
    HDFSClient.delete_folder(old_file,"impala")
    ImpalaEngine.execute_query("invalidate metadata")

    for item in sb_data:
	insert_query = ("""
         	INSERT INTO {0}.flow_storyboard PARTITION(y={1} , m={2} ,d={3})
            	VALUES ( '{4}', '{5}','{6}')
            	""") \
                .format(db,date.year,date.month,date.day, \
                item["ip_threat"],item["title"],item["text"])

        ImpalaEngine.execute_query(insert_query)
    return True
Ejemplo n.º 38
0
def details(date, uri, ip):

    if not uri and not ip:
        return None

    db = Configuration.db()
    p_details = ("""
		SELECT
		    tdate,time,clientIp,host,webcat,respcode,respcode_name
		    ,reqmethod,useragent,resconttype,referer,uriport,serverip
		    ,scbytes,csbytes,fulluri,hh
		FROM
		    {0}.proxy_edge
		WHERE
		    y={1} AND m={2} AND d={3} AND 
            (fulluri='{4}' AND clientIp='{5}')
		""").format(db, date.year, date.month, date.day, uri.replace("'", "//'"), ip)
    return ImpalaEngine.execute_query_as_list(p_details)
Ejemplo n.º 39
0
def details(date,uri,ip):

    if not uri and not ip:
        return None

    db = Configuration.db()
    p_details = ("""
		SELECT
		    tdate,time,clientIp,host,webcat,respcode,respcode_name
		    ,reqmethod,useragent,resconttype,referer,uriport,serverip
		    ,scbytes,csbytes,fulluri,hh
		FROM
		    {0}.proxy_edge
		WHERE
		    y={1} AND m={2} AND d={3} AND 
            (fulluri='{4}' AND clientIp='{5}')
		""").format(db,date.year,date.month,date.day,uri.replace("'","//'"),ip)
    return ImpalaEngine.execute_query_as_list(p_details)
Ejemplo n.º 40
0
def incident_progression(date, query,ip):

    if not ip and not query:
        return None

    db = Configuration.db()
    return_value = "dns_qry_name" if ip else "ip_dst"
    dns_threat_query = ("""
            SELECT
                anchor,total,{0}
            FROM
                {1}.dns_threat_dendro
            WHERE
                y={2} AND m={3} AND d={4}
                AND anchor = '{5}'
            """).format(return_value,db,date.year,date.month,date.day,\
            query if query else ip)
                
    return ImpalaEngine.execute_query_as_list(dns_threat_query)
Ejemplo n.º 41
0
def expanded_search(date,ip):

    db = Configuration.db()
    expanded_query = ("""
		SELECT
		    min(treceived) as firstseen, max(treceived) as lastseen,
            sip as srcip, dip as dstip, sport as sport,
            dport as dport, count(sip) as conns, max(ipkt) as maxpkts,
		    avg(ipkt) as avgpkts, max(ibyt) as maxbyts, avg(ibyt) as avgbyts
	    FROM
		    {0}.flow
        WHERE
	        y={1} AND m={2} AND d={3}
        AND (sip ='{4}'  OR dip='{4}')
        GROUP BY
		    sip, dip,sport,dport
		""").format(db,date.year,date.month,date.day,ip)

    return ImpalaEngine.execute_query_as_list(expanded_query)
Ejemplo n.º 42
0
def expanded_search(date,ip):

    db = Configuration.db()
    expanded_query = ("""
		SELECT
		    min(treceived) as firstseen, max(treceived) as lastseen,
            sip as srcip, dip as dstip, sport as sport,
            dport as dport, count(sip) as conns, max(ipkt) as maxpkts,
		    avg(ipkt) as avgpkts, max(ibyt) as maxbyts, avg(ibyt) as avgbyts
	    FROM
		    {0}.flow
        WHERE
	        y={1} AND m={2} AND d={3}
        AND (sip ='{4}'  OR dip='{4}')
        GROUP BY
		    sip, dip,sport,dport
		""").format(db,date.year,date.month,date.day,ip)

    return ImpalaEngine.execute_query_as_list(expanded_query)
Ejemplo n.º 43
0
def incident_progression(date, query, ip):

    if not ip and not query:
        return None

    db = Configuration.db()
    return_value = "dns_qry_name" if ip else "ip_dst"
    dns_threat_query = ("""
            SELECT
                anchor,total,{0}
            FROM
                {1}.dns_threat_dendro
            WHERE
                y={2} AND m={3} AND d={4}
                AND anchor = '{5}'
            """).format(return_value,db,date.year,date.month,date.day,\
            query if query else ip)

    return ImpalaEngine.execute_query_as_list(dns_threat_query)
Ejemplo n.º 44
0
def suspicious_connections(date,ip=None,limit=250):

    db = Configuration.db()
    sc_query = ("""
                SELECT STRAIGHT_JOIN
                    fs.tstart,fs.srcip,fs.dstip,fs.sport,fs.dport,proto,
                    ipkt,ibyt,opkt,obyt,ml_score,rank,srcip_internal,
                    dstip_internal,src_geoloc,dst_geoloc,src_domain,
                    dst_domain,src_rep,dst_rep
                FROM {0}.flow_scores fs
                LEFT JOIN {0}.flow_threat_investigation ft
                    ON (( fs.srcip = ft.srcip) OR ( fs.dstip = ft.dstip))
                WHERE fs.y={1} AND fs.m={2} and fs.d={3}
                    AND ( ft.srcip is NULL AND ft.dstip is NULL )
                """).format(db,date.year,date.month,date.day)

    sc_filter = ""
    if ip:
        sc_filter = " AND ( fs.srcip='{0}' OR fs.dstip='{0}')".format(ip)

    sc_filter += " ORDER BY rank  limit {0}".format(limit)
    sc_query = sc_query + sc_filter
    return ImpalaEngine.execute_query_as_list(sc_query)
Ejemplo n.º 45
0
def suspicious_connections(date,ip=None,limit=250):

    db = Configuration.db()
    sc_query = ("""
                SELECT STRAIGHT_JOIN
                    fs.tstart,fs.srcip,fs.dstip,fs.sport,fs.dport,proto,
                    ipkt,ibyt,opkt,obyt,ml_score,rank,srcip_internal,
                    dstip_internal,src_geoloc,dst_geoloc,src_domain,
                    dst_domain,src_rep,dst_rep
                FROM {0}.flow_scores fs
                LEFT JOIN {0}.flow_threat_investigation ft
                    ON (( fs.srcip = ft.srcip) OR ( fs.dstip = ft.dstip))
                WHERE fs.y={1} AND fs.m={2} and fs.d={3}
                    AND ( ft.srcip is NULL AND ft.dstip is NULL )
                """).format(db,date.year,date.month,date.day)

    sc_filter = ""
    if ip:
        sc_filter = " AND ( fs.srcip='{0}' OR fs.dstip='{0}')".format(ip)

    sc_filter += " ORDER BY rank  limit {0}".format(limit)
    sc_query = sc_query + sc_filter
    return ImpalaEngine.execute_query_as_list(sc_query)
Ejemplo n.º 46
0
def suspicious_queries(date, ip=None, query=None,limit=250):

    db = Configuration.db()
    sq_query = ("""
            SELECT STRAIGHT_JOIN
                ds.unix_tstamp,frame_len,ds.ip_dst,ds.dns_qry_name,
                dns_qry_class,dns_qry_type,dns_qry_rcode,ml_score,tld,
                query_rep,hh,dns_qry_class_name,dns_qry_type_name,
                dns_qry_rcode_name,network_context
            FROM {0}.dns_scores ds
            LEFT JOIN {0}.dns_threat_investigation dt
                ON  (ds.dns_qry_name = dt.dns_qry_name)
            WHERE
                ds.y={1} AND ds.m={2} AND ds.d={3}
                AND (dt.dns_qry_name is NULL)
            """).format(db,date.year,date.month,date.day)

    sq_filter = ""
    sq_filter += " AND ds.ip_dst = '{0}'".format(ip) if ip else ""
    sq_filter += " AND ds.dns_qry_name LIKE '%{0}%'".format(query) if query else ""
    sq_filter += " ORDER BY ds.ml_score limit {0}".format(limit)

    sq_query = sq_query + sq_filter
    return ImpalaEngine.execute_query_as_list(sq_query)
Ejemplo n.º 47
0
def create_connection():

    impala_host, impala_port = config.impala()
    conf = {}

    # TODO: if using hive, kerberos service name must be changed, impyla sets 'impala' as default
    service_name = {'kerberos_service_name': 'impala'}

    if config.kerberos_enabled():
        principal, keytab, sasl_mech, security_proto = config.kerberos()
        conf.update({
            'auth_mechanism': 'GSSAPI',
        })

    if config.ssl_enabled():
        ssl_verify, ca_location, cert, key = config.ssl()
        conf.update({'ca_cert': cert, 'use_ssl': ssl_verify})

    db = config.db()
    conn = connect(host=impala_host,
                   port=int(impala_port),
                   database=db,
                   **conf)
    return conn.cursor()
Ejemplo n.º 48
0
def score_connection(score,date,src_ip=None,dst_ip=None,src_port=None,dst_port=None):

    if not src_ip and not dst_ip and not src_port and not dst_port:
        return False

    db = Configuration.db()
    # get connections to score
    connections_query = ("""
            SELECT
                tstart,srcip,dstip,sport,dport, ibyt,ipkt
            FROM {0}.flow_scores
            WHERE
                y = {1} AND m={2} AND d={3}
            """).format(db,date.year,date.month,date.day)

    connections_filter = ""
    connections_filter += " AND srcip = '{0}'".format(src_ip) if src_ip else ""
    connections_filter += " AND dstip = '{0}'".format(dst_ip) if dst_ip else ""

    connections_filter += " AND sport = {0}" \
    .format(str(src_port)) if src_port else ""

    connections_filter += " AND dport = {0}" \
    .format(str(dst_port)) if dst_port else ""
    connections = ImpalaEngine.execute_query(connections_query + connections_filter)


    # add score to connections
    insert_command = ("""
        INSERT INTO {0}.flow_threat_investigation
        PARTITION (y={1},m={2},d={3})
        VALUES (""") \
        .format(db,date.year,date.month,date.day)

    fb_data =  []
    first = True
    num_rows = 0
    for row in connections:
        # insert into flow_threat_investigation.
        threat_data = (row[0],row[1],row[2],row[3],row[4],score)
        fb_data.append([score,row[0],row[1],row[2],row[3],row[4],row[5],row[6]])
        insert_command += "{0}{1}".format("," if not first else "", threat_data)
        first = False
        num_rows += 1

    insert_command += ")"
    if num_rows > 0: ImpalaEngine.execute_query(insert_command)

    # create feedback file.
    app_path = Configuration.spot()
    feedback_path = "{0}/flow/scored_results/{1}{2}{3}/feedback" \
    .format(app_path,date.year,str(date.month).zfill(2),str(date.day).zfill(2))

    append_file = True
    if len(HDFSClient.list_dir(feedback_path)) == 0:
        fb_data.insert(0,["sev","tstart","sip","dip","sport","dport","ipkt","ibyt"])
        append_file = False

    HDFSClient.put_file_csv(fb_data,feedback_path,"ml_feedback.csv",\
    append_file=append_file)
    return True
Ejemplo n.º 49
0
def score_connection(score,date,src_ip=None,dst_ip=None,src_port=None,dst_port=None):

    if not src_ip and not dst_ip and not src_port and not dst_port:
        return False

    db = Configuration.db()
    # get connections to score
    connections_query = ("""
            SELECT
                tstart,srcip,dstip,sport,dport, ibyt,ipkt
            FROM {0}.flow_scores
            WHERE
                y = {1} AND m={2} AND d={3}
            """).format(db,date.year,date.month,date.day)

    connections_filter = ""
    connections_filter += " AND srcip = '{0}'".format(src_ip) if src_ip else ""
    connections_filter += " AND dstip = '{0}'".format(dst_ip) if dst_ip else ""

    connections_filter += " AND sport = {0}" \
    .format(str(src_port)) if src_port else ""

    connections_filter += " AND dport = {0}" \
    .format(str(dst_port)) if dst_port else ""
    connections = ImpalaEngine.execute_query(connections_query + connections_filter)


    # add score to connections
    insert_command = ("""
        INSERT INTO {0}.flow_threat_investigation
        PARTITION (y={1},m={2},d={3})
        VALUES (""") \
        .format(db,date.year,date.month,date.day)

    fb_data =  []
    first = True
    num_rows = 0
    for row in connections:
        # insert into flow_threat_investigation.
        threat_data = (row[0],row[1],row[2],row[3],row[4],score)
        fb_data.append([score,row[0],row[1],row[2],row[3],row[4],row[5],row[6]])
        insert_command += "{0}{1}".format("," if not first else "", threat_data)
        first = False
        num_rows += 1

    insert_command += ")"
    if num_rows > 0: ImpalaEngine.execute_query(insert_command)

    # create feedback file.
    app_path = Configuration.spot()
    feedback_path = "{0}/flow/scored_results/{1}{2}{3}/feedback" \
    .format(app_path,date.year,str(date.month).zfill(2),str(date.day).zfill(2))

    append_file = True
    if len(HDFSClient.list_dir(feedback_path)) == 0:
        fb_data.insert(0,["sev","tstart","sip","dip","sport","dport","ipkt","ibyt"])
        append_file = False

    HDFSClient.put_file_csv(fb_data,feedback_path,"ml_feedback.csv",\
    append_file=append_file)
    return True
Ejemplo n.º 50
0
def create_timeline(anchor, clientips, date, top_results):
    response = ""
    susp_ips = []

    if clientips:
        srtlist = sorted(list(clientips.items()),
                         key=lambda x: x[1],
                         reverse=True)
        for val in srtlist[:top_results]:
            susp_ips.append(val[0])

    if anchor != "":
        db = Configuration.db()
        time_line_query = ("""
                SELECT p_threat,tstart,tend,duration,clientip,respcode,respcodename
                FROM {0}.proxy_timeline
                WHERE
                    y={1} AND m={2} AND d={3} AND p_threat != '{4}'
                """).format(db, date.year, date.month, date.day,
                            anchor.replace("'", "//'"))

        tmp_timeline_data = ImpalaEngine.execute_query_as_list(time_line_query)

        imp_query = ("""
                        INSERT INTO TABLE {0}.proxy_timeline
                        PARTITION (y={2}, m={3},d={4})
                        SELECT
                            '{7}' as p_threat, concat(cast(p_date as string),
                            ' ', cast(MIN(p_time) as string)) AS tstart,
                            concat(cast(p_date as string), ' ',
                            cast(MAX(p_time) as string)) AS tend,
                            SUM(duration) AS duration,
                            clientip, respcode,"respCodeName" as respCodeName
                        FROM {0}.proxy
                        WHERE fulluri='{1}' AND clientip IN ({5})
                        AND y='{2}' AND m='{3}' AND d='{4}'
                        GROUP BY clientip, p_time, respcode, p_date
                        LIMIT {6}
                    """)\
                    .format(db,anchor,date.year,str(date.month).zfill(2),\
                    str(date.day).zfill(2),("'" + "','".join(susp_ips) + "'")\
                    ,top_results,anchor)

        app_path = Configuration.spot()
        old_file = "{0}/proxy/hive/oa/timeline/y={1}/m={2}/d={3}"\
        .format(app_path,date.year,date.month,date.day)

        HDFSClient.delete_folder(old_file, "impala")
        ImpalaEngine.execute_query("invalidate metadata")

        #Insert temporary values
        for item in tmp_timeline_data:
            insert_query = ("""
                        INSERT INTO {0}.proxy_timeline PARTITION(y={1} , m={2} ,d={3})
                        VALUES ('{4}', '{5}', '{6}',{7},'{8}','{9}','{10}')
                        """)\
                        .format(db,date.year,date.month,date.day,\
                        item["p_threat"],item["tstart"],item["tend"],item["duration"],item["clientip"],item["respcode"],item["respcodename"])

            ImpalaEngine.execute_query(insert_query)

        ImpalaEngine.execute_query(imp_query)
        response = "Timeline successfully saved"
    else:
        response = "Timeline couldn't be created"
Ejemplo n.º 51
0
def create_timeline(anchor,clientips,date,top_results):
    response = ""
    susp_ips = []

    if clientips:
        srtlist = sorted(list(clientips.items()), key=lambda x: x[1], reverse=True)
        for val in srtlist[:top_results]:
            susp_ips.append(val[0])

    if anchor != "":
        db = Configuration.db()
        time_line_query = ("""
                SELECT p_threat,tstart,tend,duration,clientip,respcode,respcodename
                FROM {0}.proxy_timeline
                WHERE
                    y={1} AND m={2} AND d={3} AND p_threat != '{4}'
                """).format(db,date.year,date.month,date.day,anchor.replace("'","//'"))
        
        tmp_timeline_data = ImpalaEngine.execute_query_as_list(time_line_query)

        imp_query = ("""
                        INSERT INTO TABLE {0}.proxy_timeline
                        PARTITION (y={2}, m={3},d={4})
                        SELECT
                            '{7}' as p_threat, concat(cast(p_date as string),
                            ' ', cast(MIN(p_time) as string)) AS tstart,
                            concat(cast(p_date as string), ' ',
                            cast(MAX(p_time) as string)) AS tend,
                            SUM(duration) AS duration,
                            clientip, respcode,"respCodeName" as respCodeName
                        FROM {0}.proxy
                        WHERE fulluri='{1}' AND clientip IN ({5})
                        AND y='{2}' AND m='{3}' AND d='{4}'
                        GROUP BY clientip, p_time, respcode, p_date
                        LIMIT {6}
                    """)\
                    .format(db,anchor,date.year,str(date.month).zfill(2),\
                    str(date.day).zfill(2),("'" + "','".join(susp_ips) + "'")\
                    ,top_results,anchor)

        app_path = Configuration.spot()
        old_file = "{0}/proxy/hive/oa/timeline/y={1}/m={2}/d={3}"\
        .format(app_path,date.year,date.month,date.day)

        HDFSClient.delete_folder(old_file,"impala")
        ImpalaEngine.execute_query("invalidate metadata")

        #Insert temporary values
        for item in tmp_timeline_data:
            insert_query = ("""
                        INSERT INTO {0}.proxy_timeline PARTITION(y={1} , m={2} ,d={3})
                        VALUES ('{4}', '{5}', '{6}',{7},'{8}','{9}','{10}')
                        """)\
                        .format(db,date.year,date.month,date.day,\
                        item["p_threat"],item["tstart"],item["tend"],item["duration"],item["clientip"],item["respcode"],item["respcodename"])

            ImpalaEngine.execute_query(insert_query)

        ImpalaEngine.execute_query(imp_query)
        response = "Timeline successfully saved"
    else:
        response = "Timeline couldn't be created"
Ejemplo n.º 52
0
def score_request(date,score,uri):

    if not score and not uri:
	return None

    db = Configuration.db()
    p_query = ("""
		SELECT
		    tdate,time,clientip,host,reqmethod,useragent,resconttype
		    ,duration,username,webcat,referer,respcode,uriport
		    ,uripath,uriquery,serverip,scbytes,csbytes,fulluri
		    ,word,ml_score,uri_rep,respcode_name,network_context
		FROM
		    {0}.proxy_scores
		WHERE
		    y={1} and m={2} and d={3}
		    AND fulluri = '{4}'
		""").format(db,date.year,date.month,date.day,uri)

    connections = ImpalaEngine.execute_query(p_query)

    # add score to connections
    insert_command = ("""
		INSERT INTO {0}.proxy_threat_investigation PARTITION (y={1},m={2},d={3})
		VALUES (""") \
        .format(db,date.year,date.month,date.day)

    fb_data =  []
    first = True
    num_rows = 0
    for row in connections:
        cip_index = row[2]
        uri_index = row[18]
        tme_index = row[2]
        hash_field = [str( md5.new(str(cip_index) + str(uri_index)).hexdigest() \
        + str((tme_index.split(":"))[0]) )]

        threat_data = (row[0],row[18],score)
        fb_data.append([row[0],row[1],row[2],row[3],row[4],row[5],row[6],row[7] \
			,row[8],row[9],row[10],row[11],row[12],row[13],row[14],row[15] \
			,row[16],row[17],row[18],row[19],score,row[20],row[21],row[22], \
			row[23],hash_field])
        insert_command += "{0}{1}".format("," if not first else "", threat_data)
        first = False
        num_rows += 1

    insert_command += ")"
    if num_rows > 0: ImpalaEngine.execute_query(insert_command)

    # create feedback file.
    app_path = Configuration.spot()
    feedback_path = "{0}/proxy/scored_results/{1}{2}{3}/feedback"\
    .format(app_path,date.year,str(date.month).zfill(2),str(date.day).zfill(2))

    ap_file = True
    if len(HDFSClient.list_dir(feedback_path)) == 0:
    	fb_data.insert(0,["p_date","p_time","clientip","host","reqmethod",\
        "useragent","resconttype","duration","username","webcat","referer",\
        "respcode","uriport","uripath","uriquery","serverip","scbytes","csbytes",\
        "fulluri","word","score","uri_rep","uri_sev","respcode_name",\
        "network_context","hash"])
        ap_file = False

    HDFSClient.put_file_csv(fb_data,feedback_path,"ml_feedback.csv",append_file=ap_file)
    return True
Ejemplo n.º 53
0
def  score_connection(date,ip="", dns="", ip_sev=0, dns_sev=0):

    if (not ip and not ip_sev) and (not dns and not dns_sev):
        return False

    db = Configuration.db()
    sq_query = ("""
		SELECT
    	    frame_time,unix_tstamp,frame_len,ip_dst,dns_qry_name,dns_qry_class,
		    dns_qry_type,dns_qry_rcode,ml_score,tld,query_rep,
		    hh,dns_qry_class_name,dns_qry_type_name,dns_qry_rcode_name,
		    network_context
		FROM
		    {0}.dns_scores
		WHERE
		    y={1} and m={2} and d={3}
            AND (
		""").format(db,date.year,date.month,date.day)

    connections_filter = ""
    connections_filter += "ip_dst = '{0}' ".format(ip) if ip else ""
    connections_filter += " OR " if ip and dns else ""
    connections_filter += "dns_qry_name = '{0}' ".format(dns) if dns else ""
    connections_filter += ")"
    connections = ImpalaEngine.execute_query(sq_query + connections_filter)

    # add score to connections

    insert_command = ("""INSERT INTO {0}.dns_threat_investigation
                        PARTITION (y={1},m={2},d={3})
                        VALUES (""") \
                        .format(db,date.year,date.month,date.day)

    fb_data =  []
    first = True
    num_rows = 0
    for row in connections:
        # insert into dns_threat_investigation.
        threat_data = (row[1],row[3],row[4],ip_sev if ip == row[3] else 0,\
        dns_sev if dns == row[4] else 0)

        fb_data.append([row[0],row[2],row[3],row[4],row[5],row[6],row[7],\
        row[8],row[9],row[10],row[11],ip_sev,dns_sev,row[12],row[13],row[14],\
        row[15],row[1]])

        insert_command += "{0}{1}".format("," if not first else "", threat_data)
        first = False
        num_rows += 1

    insert_command += ")"
    if num_rows > 0: ImpalaEngine.execute_query(insert_command)

    # create feedback file.
    app_path = Configuration.spot()
    feedback_path = "{0}/dns/scored_results/{1}{2}{3}/feedback"\
    .format(app_path,date.year,str(date.month).zfill(2),str(date.day).zfill(2))
    ap_file = True

    if len(HDFSClient.list_dir(feedback_path)) == 0:
        fb_data.insert(0,["frame_time","frame_len","ip_dst","dns_qry_name",\
        "dns_qry_class","dns_qry_type","dns_qry_rcode","score","tld","query_rep",\
        "hh","ip_sev","dns_sev","dns_qry_class_name","dns_qry_type_name",\
        "dns_qry_rcode_name","network_context","unix_tstamp"])
        ap_file = False

    HDFSClient.put_file_csv(fb_data,feedback_path,"ml_feedback.csv",append_file=ap_file)
    return True
Ejemplo n.º 54
0
def score_connection(date, ip="", dns="", ip_sev=0, dns_sev=0):

    if (not ip and not ip_sev) and (not dns and not dns_sev):
        return False

    db = Configuration.db()
    sq_query = ("""
		SELECT
    	    frame_time,unix_tstamp,frame_len,ip_dst,dns_qry_name,dns_qry_class,
		    dns_qry_type,dns_qry_rcode,ml_score,tld,query_rep,
		    hh,dns_qry_class_name,dns_qry_type_name,dns_qry_rcode_name,
		    network_context
		FROM
		    {0}.dns_scores
		WHERE
		    y={1} and m={2} and d={3}
            AND (
		""").format(db, date.year, date.month, date.day)

    connections_filter = ""
    connections_filter += "ip_dst = '{0}' ".format(ip) if ip else ""
    connections_filter += " OR " if ip and dns else ""
    connections_filter += "dns_qry_name = '{0}' ".format(dns) if dns else ""
    connections_filter += ")"
    connections = ImpalaEngine.execute_query(sq_query + connections_filter)

    # add score to connections

    insert_command = ("""INSERT INTO {0}.dns_threat_investigation
                        PARTITION (y={1},m={2},d={3})
                        VALUES (""") \
                        .format(db,date.year,date.month,date.day)

    fb_data = []
    first = True
    num_rows = 0
    for row in connections:
        # insert into dns_threat_investigation.
        threat_data = (row[1],row[3],row[4],ip_sev if ip == row[3] else 0,\
        dns_sev if dns == row[4] else 0)

        fb_data.append([row[0],row[2],row[3],row[4],row[5],row[6],row[7],\
        row[8],row[9],row[10],row[11],ip_sev,dns_sev,row[12],row[13],row[14],\
        row[15],row[1]])

        insert_command += "{0}{1}".format("," if not first else "",
                                          threat_data)
        first = False
        num_rows += 1

    insert_command += ")"
    if num_rows > 0: ImpalaEngine.execute_query(insert_command)

    # create feedback file.
    app_path = Configuration.spot()
    feedback_path = "{0}/dns/scored_results/{1}{2}{3}/feedback"\
    .format(app_path,date.year,str(date.month).zfill(2),str(date.day).zfill(2))
    ap_file = True

    if len(HDFSClient.list_dir(feedback_path)) == 0:
        fb_data.insert(0,["frame_time","frame_len","ip_dst","dns_qry_name",\
        "dns_qry_class","dns_qry_type","dns_qry_rcode","score","tld","query_rep",\
        "hh","ip_sev","dns_sev","dns_qry_class_name","dns_qry_type_name",\
        "dns_qry_rcode_name","network_context","unix_tstamp"])
        ap_file = False

    HDFSClient.put_file_csv(fb_data,
                            feedback_path,
                            "ml_feedback.csv",
                            append_file=ap_file)
    return True
Ejemplo n.º 55
0
def score_request(date, score, uri):

    if not score and not uri:
        return None

    db = Configuration.db()
    p_query = ("""
		SELECT
		    tdate,time,clientip,host,reqmethod,useragent,resconttype
		    ,duration,username,webcat,referer,respcode,uriport
		    ,uripath,uriquery,serverip,scbytes,csbytes,fulluri
		    ,word,ml_score,uri_rep,respcode_name,network_context
		FROM
		    {0}.proxy_scores
		WHERE
		    y={1} and m={2} and d={3}
		    AND fulluri = '{4}'
		""").format(db, date.year, date.month, date.day, uri)

    connections = ImpalaEngine.execute_query(p_query)

    # add score to connections
    insert_command = ("""
		INSERT INTO {0}.proxy_threat_investigation PARTITION (y={1},m={2},d={3})
		VALUES (""") \
        .format(db,date.year,date.month,date.day)

    fb_data = []
    first = True
    num_rows = 0
    for row in connections:
        cip_index = row[2]
        uri_index = row[18]
        tme_index = row[2]
        hash_field = [str( md5.new(str(cip_index) + str(uri_index)).hexdigest() \
        + str((tme_index.split(":"))[0]) )]

        threat_data = (row[0], row[18], score)
        fb_data.append([row[0],row[1],row[2],row[3],row[4],row[5],row[6],row[7] \
   ,row[8],row[9],row[10],row[11],row[12],row[13],row[14],row[15] \
   ,row[16],row[17],row[18],row[19],score,row[20],row[21],row[22], \
   row[23],hash_field])
        insert_command += "{0}{1}".format("," if not first else "",
                                          threat_data)
        first = False
        num_rows += 1

    insert_command += ")"
    if num_rows > 0: ImpalaEngine.execute_query(insert_command)

    # create feedback file.
    app_path = Configuration.spot()
    feedback_path = "{0}/proxy/scored_results/{1}{2}{3}/feedback"\
    .format(app_path,date.year,str(date.month).zfill(2),str(date.day).zfill(2))

    ap_file = True
    if len(HDFSClient.list_dir(feedback_path)) == 0:
        fb_data.insert(0,["p_date","p_time","clientip","host","reqmethod",\
           "useragent","resconttype","duration","username","webcat","referer",\
           "respcode","uriport","uripath","uriquery","serverip","scbytes","csbytes",\
           "fulluri","word","score","uri_rep","uri_sev","respcode_name",\
           "network_context","hash"])
        ap_file = False

    HDFSClient.put_file_csv(fb_data,
                            feedback_path,
                            "ml_feedback.csv",
                            append_file=ap_file)
    return True
Ejemplo n.º 56
0
def create_connection():

    impala_host, impala_port = Config.impala()
    db = Config.db()
    conn = connect(host=impala_host, port=int(impala_port), database=db)
    return conn.cursor()