def find_blacklisted_ipvoid_mp(arglist): global CURR_DONE global TOTAL_TO_DO check_list, customer, result_type = arglist # Get destination ip and list of sources it connected to dst = check_list[0] src_list = check_list[1] # Report progress with CURR_DONE_LOCK: CURR_DONE.value += 1 local_curr_done = CURR_DONE.value if (local_curr_done % 10 == 0) or (local_curr_done == TOTAL_TO_DO.value): progress_bar(local_curr_done, TOTAL_TO_DO.value) response = "" try: response = urllib2.urlopen('http://www.ipvoid.com/scan/' + dst) except: return html = response.read().decode('utf-8') if 'BLACKLISTED' in html: line_splt = html.split('BLACKLISTED ') times_seen = int(line_splt[1].split('/')[0]) for src in src_list: write_data([src, dst, times_seen], customer, result_type) response.close()
def find_long_urls(customer, threshold, result_type): # searching for duration in log files, not results doc_type = 'logs' # fields to return from elasticsearch query fields = [SOURCE_IP, URL] # restrict results to specified customer constraints = [] # anything we want to filter out ignore = [] scroll_id = "" scroll_len = 1000 scrolling = True print(colors.bcolors.OKBLUE + '>>> Retrieving information from elasticsearch...') url_dict = {} count = 0 error_count = 0 while scrolling: # Retrieve data, which will come in sorted by longest entry for url field hits, scroll_id, scroll_size = ht_data.get_data(customer, doc_type,fields, constraints, ignore, scroll_id, scroll_len) progress_bar(count, scroll_size) for i in hits: count += 1 try: url = i['fields'][URL][0] data = i['fields'] except: error_count += 1 continue key = len(url) # If key already exists, append the data, otherwise create new key with list that holds data if key in url_dict.keys(): url_dict[key].append(data) else: url_dict[key] = [data] if len(hits) < 1: scrolling = False # Get total number of keys (unique url lengths) total_keys = len(url_dict) # Verify that ES query actually returned some results if not total_keys == 0: print '>>> Finding the longest URLS... ' final_res = [] key_count = 0 keys = sorted(url_dict.keys(), reverse=True) done = False # Get threshold amount of longest urls for url_length in keys: if done == True: break for entry in url_dict[url_length]: if (key_count % 10 == 0) or (key_count == threshold): progress_bar(key_count, threshold) key_count += 1 if key_count > threshold: done = True break else: final_res.append(entry) # WRITE THE DATA write_count = 0 write_total = len(final_res) print '>>> Writing results of analysis...' for data in final_res: write_count += 1 if (write_count % 10 == 0) or (write_count == write_total): progress_bar(write_count, write_total) write_data(data, customer, result_type) else: print (colors.bcolors.WARNING + '[!] Querying elasticsearch failed - Verify your log configuration file! [!]'+ colors.bcolors.ENDC) if error_count > 0: print (colors.bcolors.WARNING + '[!] ' + str(error_count) + ' log entries with misnamed or missing field values skipped! [!]'+ colors.bcolors.ENDC)
def beacon_analysis(customer, proto, result_type): global TOTAL_TO_DO global CURR_DONE global TIME_DICT CURR_DONE.value = 0 worker_pool = Pool(processes=None, maxtasksperchild=1) # searching for beacons in log files, not results doc_type = 'logs' # fields to return from elasticsearch query fields = [SOURCE_IP, DESTINATION_IP, DESTINATION_PORT, PROTOCOL, TIMESTAMP] if proto != "": # restrict results to specified customer constraints = [{PROTOCOL: proto}] else: constraints = [] # anything we want to filter out ignore = [] scroll_id = "" scroll_len = 1000 scrolling = True print( colors.bcolors.OKBLUE + '>>> Retrieving information from elasticsearch and building a dictionary... ' ) # start index for progress bar count = 0 error_count = 0 # Build a dictionary for beacon detection while scrolling: # Retrieve data hits, scroll_id, scroll_size = ht_data.get_data( customer, doc_type, fields, constraints, ignore, scroll_id, scroll_len) for entry in hits: count += 1 try: # create dictionary key key = (entry['fields'][SOURCE_IP][0], entry['fields'][DESTINATION_IP][0], entry['fields'][DESTINATION_PORT][0]) # append timestamp to dictionary under unique key dt = dt_parser.parse(entry['fields'][TIMESTAMP][0]) ts = time.mktime(dt.timetuple()) TIME_DICT[key].append(int(ts)) except: error_count += 1 continue # Report progress progress_bar(count, scroll_size) # stop scrolling if no more hits if count == scroll_size: scrolling = False if not (len(TIME_DICT) == 0): # parallelize it m = Manager() db_queue = m.Queue() n_cores = multiprocessing.cpu_count() print('>>> Found ' + str(n_cores) + ' core(s)!') # create parameter list for threads and keys arglist = [] for key in TIME_DICT: arglist.append((key, db_queue)) # determine the total number of keys to be split up amongst threads TOTAL_TO_DO.value = len(arglist) # run the fft mapping print ">>> Running beacon analysis... " worker_pool.map(perform_fft_mp, iterable=arglist, chunksize=1000) # Write results to elasticsearch while not db_queue.empty(): vals = [] try: vals = db_queue.get() n_vals = len(list(vals)) except: break write_data(vals, customer, proto, result_type) else: print( colors.bcolors.WARNING + '[!] Querying elasticsearch failed - Verify your log configuration file!' + colors.bcolors.ENDC) if error_count > 0: print(colors.bcolors.WARNING + '[!] ' + str(error_count) + ' log entries with misnamed or missing field values skipped!' + colors.bcolors.ENDC)
def find_beacons_graph(customer, proto, category, save_dir): # Make directory to store graphs if not os.path.exists(save_dir): os.makedirs(save_dir) # searching for beacons in log files, not results doc_type = 'results' # fields to return from elasticsearch query fields = [ SOURCE_IP, DESTINATION_IP, DESTINATION_PORT, 'min_hz', 'max_hz', TIMESTAMP ] # restrict results to specified customer if proto != "": constraints = [{PROTOCOL: proto}, {'result_type': category}] proto_temp = proto else: constraints = [{'result_type': category}] proto_temp = "All Protocols" # anything we want to filter out ignore = [] scroll_id = "" scroll_len = 1000 scrolling = True print('>>> Retrieving information from elasticsearch...') # start index for results count = 0 error_count = 0 # Build a dictionary for beacon detection while scrolling: # Retrieve data hits, scroll_id, scroll_size = ht_data.get_data( customer, doc_type, fields, constraints, ignore, scroll_id, scroll_len) for entry in hits: count += 1 progress_bar(count, scroll_size) try: src = entry['fields'][SOURCE_IP][0] dst = entry['fields'][DESTINATION_IP][0] dpt = entry['fields'][DESTINATION_PORT][0] min_hz = entry['fields']['min_hz'][0] max_hz = entry['fields']['max_hz'][0] except: error_count += 1 continue times = get_datetimes(src, dst, dpt, customer, proto) if not len(times) > 10: return None span = times[-1] - times[0] if span > 0: n_times = len(times) for idx in range(1, n_times): times[idx] = times[idx] - times[0] times[0] = 0 n = scipy.zeros(times[-1] + 1) for time_idx in times: n[time_idx] += 1 fig = Figure() sub_fig = fig.add_subplot(111) span_6_hours = min([len(n), 21600]) times_6_hours = n[:span_6_hours] #n, bins, patches = sub_fig.hist(times, span, normed=0, # histtype='step', # linestyle='dashed') sample_sz = len(n) k = scipy.arange(sample_sz) freq = k / float(sample_sz) freq = freq[:sample_sz // 2] Y = abs(np.fft.rfft(n) / sample_sz) Y = Y[:sample_sz // 2] zero_len = min([len(Y), 10]) for idx in range(zero_len): Y[idx] = 0 curr_min_range = int((len(Y) / 0.5) * min_hz + 0.5) curr_max_range = int((len(Y) / 0.5) * max_hz + 0.5) Y = Y[curr_min_range:curr_max_range] freq = freq[curr_min_range:curr_max_range] canvas = FigureCanvas(fig) #P.setp(patches, 'facecolor', 'g', 'alpha', 0.75) sub_fig.plot(times_6_hours) sub_fig.set_title(category + ' (histogram)--Customer: ' + customer + '\nSrc: ' + src + ' Dest: ' + dst + ' Proto: ' + proto_temp + ' DstPort: ' + dpt) sub_fig.set_xlabel('Time Stamp (UNIT)') sub_fig.set_ylabel('Connection Attempts') P.gca().set_ylim(ymax=10) canvas.print_figure(save_dir + 'Src-' + src.replace('.', '_') + '_Dst-' + dst.replace('.', '_') + '_' + proto_temp + '_' + dpt + '_minhz-' + str(min_hz) + '_maxhz-' + str(max_hz) + '_' + customer + '_histb.png') P.close(fig) sub_fig.clear() fig = Figure() canvas = FigureCanvas(fig) sub_fig = fig.add_subplot(111) sub_fig.plot(freq, abs(Y), '--') sub_fig.set_title(category + ' (FFT)--Customer: ' + customer + '\nSrc: ' + src + ' Dest: ' + dst + ' Proto: ' + proto + ' DstPort: ' + dpt) sub_fig.set_xlabel('Freq (HZ)') sub_fig.set_ylabel('|Y(FREQ)|') canvas.print_figure(save_dir + 'Src-' + src.replace('.', '_') + '_Dst-' + dst.replace('.', '_') + '_' + proto + '_' + dpt + '_minhz-' + str(min_hz) + '_maxhz-' + str(max_hz) + '_' + customer + '_fft.png') P.close(fig) if len(hits) < 1: scrolling = False if error_count > 0: print( colors.bcolors.WARNING + '[!] ' + str(error_count) + ' results entries with misnamed or missing field values skipped!' + colors.bcolors.ENDC) print(colors.bcolors.OKGREEN + '[+] Finished generating graphs ' + '[+]' + colors.bcolors.ENDC)
def perform_fft_mp(arglist): """ Use fourier transform to look for beacons in a dataset specified in arg list make a table and mark those beacons in the database. """ global CURR_DONE global UNLIKELY_CURR global TOTAL_TO_DO global TIME_DICT global CURR_DONE_LOCK key, db_queue = arglist # Mutex lock to update number of items completed so far with CURR_DONE_LOCK: CURR_DONE.value += 1 local_curr_done = CURR_DONE.value # Draw a progress bar if (local_curr_done % 1000 == 0) or (local_curr_done == TOTAL_TO_DO.value): progress_bar(local_curr_done, TOTAL_TO_DO.value) src = key[0] # Source IP dst = key[1] # Destination IP dpt = key[2] # Destination Port # Return if the sample size is too small if len(TIME_DICT[key]) < 10: return None # Sort the list of timestamps for this connection ts = sorted(TIME_DICT[key]) # Make sure the last timestep is greater than the first if 0 < (ts[-1] - ts[0]): # Change the timestamp from seconds since the epoch to seconds since timestamp_0 for idx in range(1, len(ts)): ts[idx] = ts[idx] - ts[0] ts[0] = 0 # Create an array of seconds from 0 to the greatest timestep n = scipy.zeros(ts[-1]) # For each timestamp, increment the count for that particular time in # the n array for time_idx in ts: n[time_idx - 1] = n[time_idx - 1] + 1 sample_sz = len(n) # Create a range of numbers, 0 to the length of n k = scipy.arange(sample_sz) # Create a list of frequencies by dividing each element in k # by the length of k... ie k=1 -> freq=1/60 freq = k / float(sample_sz) # Only look at the first half of the frequency range freq = freq[:sample_sz // 2] # Run Fast Fourier Transform on sample # Only look at positive frequencies from 0 to half the sample size Y = abs(np.fft.rfft(n) / sample_sz) Y = Y[:sample_sz // 2] # Get rid of high frequencies... zero_len = min([len(Y), 10]) for idx in range(zero_len): Y[idx] = 0 mar_vals = () for mar_name, min_hz, max_hz in MAR_NAMES_LIST: if len(Y) <= 1: return None # Determine range of frequencies to examine curr_min_range = int((len(Y) / 0.5) * min_hz + 0.5) curr_max_range = int((len(Y) / 0.5) * max_hz + 0.5) tmp_Y = Y[curr_min_range:curr_max_range] if len(tmp_Y) <= 1: return None # Determine average and max value for frequencies in # the desired range fft_avg = np.mean(tmp_Y) y_max = np.amax(tmp_Y) if fft_avg <= 0: return None # Save max/average for the frequency range max_avg_ratio = y_max / fft_avg mar_vals += (max_avg_ratio, ) ret_vals = (src, dst, dpt) + mar_vals db_queue.put(ret_vals) return None
def beacon_analysis(customer, proto, result_type): global TOTAL_TO_DO global CURR_DONE global TIME_DICT CURR_DONE.value = 0 worker_pool = Pool(processes=None, maxtasksperchild=1) # searching for beacons in log files, not results doc_type = 'logs' # fields to return from elasticsearch query fields = [SOURCE_IP, DESTINATION_IP, DESTINATION_PORT, PROTOCOL, TIMESTAMP] if proto != "": # restrict results to specified customer constraints = [{PROTOCOL:proto}] else: constraints = [] # anything we want to filter out ignore = [] scroll_id = "" scroll_len = 1000 scrolling = True print(colors.bcolors.OKBLUE + '>>> Retrieving information from elasticsearch and building a dictionary... ') # start index for progress bar count = 0 error_count = 0 # Build a dictionary for beacon detection while scrolling: # Retrieve data hits, scroll_id, scroll_size = ht_data.get_data(customer, doc_type,fields, constraints, ignore, scroll_id, scroll_len) for entry in hits: count += 1 try: # create dictionary key key = (entry['fields'][SOURCE_IP][0], entry['fields'][DESTINATION_IP][0], entry['fields'][DESTINATION_PORT][0]) # append timestamp to dictionary under unique key dt = dt_parser.parse(entry['fields'][TIMESTAMP][0]) ts = time.mktime(dt.timetuple()) TIME_DICT[key].append(int(ts)) except: error_count += 1 continue # Report progress progress_bar(count,scroll_size) # stop scrolling if no more hits if count == scroll_size: scrolling = False if not (len(TIME_DICT) == 0): # parallelize it m = Manager() db_queue = m.Queue() n_cores = multiprocessing.cpu_count() print('>>> Found ' + str(n_cores) + ' core(s)!') # create parameter list for threads and keys arglist = [] for key in TIME_DICT: arglist.append((key, db_queue)) # determine the total number of keys to be split up amongst threads TOTAL_TO_DO.value = len(arglist) # run the fft mapping print ">>> Running beacon analysis... " worker_pool.map(perform_fft_mp, iterable=arglist, chunksize=1000) # Write results to elasticsearch while not db_queue.empty(): vals = [] try: vals = db_queue.get() n_vals = len(list(vals)) except: break write_data(vals, customer, proto, result_type) else: print (colors.bcolors.WARNING + '[!] Querying elasticsearch failed - Verify your log configuration file!'+ colors.bcolors.ENDC) if error_count > 0: print (colors.bcolors.WARNING + '[!] ' + str(error_count) + ' log entries with misnamed or missing field values skipped!'+ colors.bcolors.ENDC)
def find_beacons_graph(customer, proto, category, save_dir): # Make directory to store graphs if not os.path.exists(save_dir): os.makedirs(save_dir) # searching for beacons in log files, not results doc_type = 'results' # fields to return from elasticsearch query fields = [SOURCE_IP, DESTINATION_IP, DESTINATION_PORT, 'min_hz', 'max_hz', TIMESTAMP] # restrict results to specified customer if proto != "": constraints = [{PROTOCOL:proto}, {'result_type':category}] proto_temp = proto else: constraints = [{'result_type':category}] proto_temp = "All Protocols" # anything we want to filter out ignore = [] scroll_id = "" scroll_len = 1000 scrolling = True print('>>> Retrieving information from elasticsearch...') # start index for results count = 0 error_count = 0 # Build a dictionary for beacon detection while scrolling: # Retrieve data hits, scroll_id, scroll_size = ht_data.get_data(customer, doc_type,fields, constraints, ignore, scroll_id, scroll_len) for entry in hits: count += 1 progress_bar(count, scroll_size) try: src = entry['fields'][SOURCE_IP][0] dst = entry['fields'][DESTINATION_IP][0] dpt = entry['fields'][DESTINATION_PORT][0] min_hz = entry['fields']['min_hz'][0] max_hz = entry['fields']['max_hz'][0] except: error_count += 1 continue times = get_datetimes(src, dst, dpt, customer, proto) if not len(times) > 10: return None span = times[-1] - times[0] if span > 0: n_times = len(times) for idx in range(1, n_times): times[idx] = times[idx] - times[0] times[0] = 0 n = scipy.zeros(times[-1] + 1) for time_idx in times: n[time_idx] += 1 fig = Figure() sub_fig = fig.add_subplot(111) span_6_hours = min([len(n), 21600]) times_6_hours = n[:span_6_hours] #n, bins, patches = sub_fig.hist(times, span, normed=0, # histtype='step', # linestyle='dashed') sample_sz = len(n) k = scipy.arange(sample_sz) freq = k/float(sample_sz) freq = freq[:sample_sz//2] Y = abs(np.fft.rfft(n)/sample_sz) Y = Y[:sample_sz//2] zero_len = min([len(Y), 10]) for idx in range(zero_len): Y[idx] = 0 curr_min_range = int( (len(Y) / 0.5) * min_hz + 0.5) curr_max_range = int( (len(Y) / 0.5) * max_hz + 0.5) Y = Y[curr_min_range:curr_max_range] freq = freq[curr_min_range:curr_max_range] canvas = FigureCanvas(fig) #P.setp(patches, 'facecolor', 'g', 'alpha', 0.75) sub_fig.plot(times_6_hours) sub_fig.set_title(category + ' (histogram)--Customer: ' + customer+ '\nSrc: ' + src + ' Dest: ' + dst + ' Proto: ' + proto_temp + ' DstPort: ' + dpt) sub_fig.set_xlabel('Time Stamp (UNIT)') sub_fig.set_ylabel('Connection Attempts') P.gca().set_ylim(ymax=10) canvas.print_figure(save_dir + 'Src-' + src.replace('.', '_') + '_Dst-' + dst.replace('.', '_') + '_' + proto_temp + '_' + dpt + '_minhz-' + str(min_hz) + '_maxhz-' + str(max_hz) + '_' + customer + '_histb.png') P.close(fig) sub_fig.clear() fig = Figure() canvas = FigureCanvas(fig) sub_fig = fig.add_subplot(111) sub_fig.plot(freq, abs(Y), '--') sub_fig.set_title(category +' (FFT)--Customer: ' + customer + '\nSrc: ' + src + ' Dest: ' + dst + ' Proto: ' + proto + ' DstPort: ' + dpt) sub_fig.set_xlabel('Freq (HZ)') sub_fig.set_ylabel('|Y(FREQ)|') canvas.print_figure(save_dir + 'Src-' + src.replace('.', '_') + '_Dst-' + dst.replace('.', '_') + '_' + proto + '_' + dpt + '_minhz-' + str(min_hz) + '_maxhz-' + str(max_hz) + '_' + customer+ '_fft.png') P.close(fig) if len(hits) < 1: scrolling = False if error_count > 0: print (colors.bcolors.WARNING + '[!] ' + str(error_count) + ' results entries with misnamed or missing field values skipped!'+ colors.bcolors.ENDC) print(colors.bcolors.OKGREEN + '[+] Finished generating graphs ' + '[+]' + colors.bcolors.ENDC)
def perform_fft_mp(arglist): """ Use fourier transform to look for beacons in a dataset specified in arg list make a table and mark those beacons in the database. """ global CURR_DONE global UNLIKELY_CURR global TOTAL_TO_DO global TIME_DICT global CURR_DONE_LOCK key, db_queue = arglist # Mutex lock to update number of items completed so far with CURR_DONE_LOCK: CURR_DONE.value += 1 local_curr_done = CURR_DONE.value # Draw a progress bar if (local_curr_done % 1000 == 0) or (local_curr_done == TOTAL_TO_DO.value): progress_bar(local_curr_done, TOTAL_TO_DO.value) src = key[0] # Source IP dst = key[1] # Destination IP dpt = key[2] # Destination Port # Return if the sample size is too small if len(TIME_DICT[key]) < 10: return None # Sort the list of timestamps for this connection ts = sorted(TIME_DICT[key]) # Make sure the last timestep is greater than the first if 0 < (ts[-1] - ts[0]): # Change the timestamp from seconds since the epoch to seconds since timestamp_0 for idx in range(1, len(ts)): ts[idx] = ts[idx] - ts[0] ts[0] = 0 # Create an array of seconds from 0 to the greatest timestep n = scipy.zeros(ts[-1]) # For each timestamp, increment the count for that particular time in # the n array for time_idx in ts: n[time_idx-1] = n[time_idx-1] + 1 sample_sz = len(n) # Create a range of numbers, 0 to the length of n k = scipy.arange(sample_sz) # Create a list of frequencies by dividing each element in k # by the length of k... ie k=1 -> freq=1/60 freq = k/float(sample_sz) # Only look at the first half of the frequency range freq = freq[:sample_sz//2] # Run Fast Fourier Transform on sample # Only look at positive frequencies from 0 to half the sample size Y = abs(np.fft.rfft(n)/sample_sz) Y = Y[:sample_sz//2] # Get rid of high frequencies... zero_len = min([len(Y), 10]) for idx in range(zero_len): Y[idx] = 0 mar_vals = () for mar_name, min_hz, max_hz in MAR_NAMES_LIST: if len(Y) <= 1: return None # Determine range of frequencies to examine curr_min_range = int( (len(Y) / 0.5) * min_hz + 0.5) curr_max_range = int( (len(Y) / 0.5) * max_hz + 0.5) tmp_Y = Y[curr_min_range:curr_max_range] if len(tmp_Y) <= 1: return None # Determine average and max value for frequencies in # the desired range fft_avg = np.mean(tmp_Y) y_max = np.amax(tmp_Y) if fft_avg <= 0: return None # Save max/average for the frequency range max_avg_ratio = y_max / fft_avg mar_vals += (max_avg_ratio,) ret_vals = (src, dst, dpt) + mar_vals db_queue.put( ret_vals ) return None
def find_cross_analysis(customer, result_type): # Search will be conducted in log files doc_type = 'results' # fields to return from elasticsearch query fields = [SOURCE_IP, 'result_type'] # restrict results to specified customer and eventId to list of possible IDs constraints = [] # anything we want to filter out ignore = [] sort = "" # create dictionary to store user login info crossref_dict = defaultdict(dict) scroll_id = "" scrolling = True scroll_len = 1000 count = 0 error_count = 0 print(colors.bcolors.OKBLUE +'>>> Retrieving information from elasticsearch...') while scrolling: # Retrieve data hits, scroll_id, scroll_size = ht_data.get_data(customer, doc_type, fields, constraints, ignore, scroll_id, scroll_len, sort) # For every unique username (used as dict key), make a dictionary of event activity for entry in hits: try: src_list = entry['fields'][SOURCE_IP] behavior = entry['fields']['result_type'][0] if behavior not in CROSSREF_BEHAVIORS: continue except: error_count += 1 continue for src in src_list: # If src has not been added to dictionary, add it if src not in crossref_dict: crossref_dict[src] = [] if behavior not in crossref_dict[src]: crossref_dict[src].append(behavior) # stop scrolling if no more hits if len(hits) < 1: scrolling = False else: count += len(hits) # Report progress if (count % 10 == 0) or (count == scroll_size): progress_bar(count, scroll_size) crossref_dict_len = len(crossref_dict) if not (crossref_dict_len == 0): num_found = 0 print('>>> Performing cross-analysis and writing results to elasticsearch... ') # Record all src ips with multiple behaviors count = 0 for src in sorted(crossref_dict, key=lambda src: len(crossref_dict[src]), reverse=True): # Report progress count += 1 progress_bar(count, crossref_dict_len) if len(crossref_dict[src]) > 1: num_found += 1 write_data(src, crossref_dict[src], customer, result_type) print(colors.bcolors.WARNING + '[+] ' + str(num_found) + ' source IPs with multiple malicious behaviors found! [+]'+ colors.bcolors.ENDC) else: print (colors.bcolors.WARNING + '\nQuerying elasticsearch failed - Verify that you have ran the other modules first!'+ colors.bcolors.ENDC) if error_count > 0: print (colors.bcolors.WARNING + '[!] ' + str(error_count) + ' log entries with misnamed or missing field values skipped! [!]'+ colors.bcolors.ENDC)
def find_long_urls(customer, threshold, result_type): # searching for duration in log files, not results doc_type = 'logs' # fields to return from elasticsearch query fields = [SOURCE_IP, URL] # restrict results to specified customer constraints = [] # anything we want to filter out ignore = [] scroll_id = "" scroll_len = 1000 scrolling = True print(colors.bcolors.OKBLUE + '>>> Retrieving information from elasticsearch...') url_dict = {} count = 0 error_count = 0 while scrolling: # Retrieve data, which will come in sorted by longest entry for url field hits, scroll_id, scroll_size = ht_data.get_data( customer, doc_type, fields, constraints, ignore, scroll_id, scroll_len) progress_bar(count, scroll_size) for i in hits: count += 1 try: url = i['fields'][URL][0] data = i['fields'] except: error_count += 1 continue key = len(url) # If key already exists, append the data, otherwise create new key with list that holds data if key in url_dict.keys(): url_dict[key].append(data) else: url_dict[key] = [data] if len(hits) < 1: scrolling = False # Get total number of keys (unique url lengths) total_keys = len(url_dict) # Verify that ES query actually returned some results if not total_keys == 0: print '>>> Finding the longest URLS... ' final_res = [] key_count = 0 keys = sorted(url_dict.keys(), reverse=True) done = False # Get threshold amount of longest urls for url_length in keys: if done == True: break for entry in url_dict[url_length]: if (key_count % 10 == 0) or (key_count == threshold): progress_bar(key_count, threshold) key_count += 1 if key_count > threshold: done = True break else: final_res.append(entry) # WRITE THE DATA write_count = 0 write_total = len(final_res) print '>>> Writing results of analysis...' for data in final_res: write_count += 1 if (write_count % 10 == 0) or (write_count == write_total): progress_bar(write_count, write_total) write_data(data, customer, result_type) else: print( colors.bcolors.WARNING + '[!] Querying elasticsearch failed - Verify your log configuration file! [!]' + colors.bcolors.ENDC) if error_count > 0: print( colors.bcolors.WARNING + '[!] ' + str(error_count) + ' log entries with misnamed or missing field values skipped! [!]' + colors.bcolors.ENDC)
def find_concurrent(customer, result_type): # Search will be conducted in log files doc_type = 'logs' # fields to return from elasticsearch query fields = [EVENT_ID, USER_NAME, SOURCE_IP, TIMESTAMP] # restrict results to specified customer and eventId to list of possible IDs constraints = [] # anything we want to filter out ignore = [] # Sort results by timestamp sort = TIMESTAMP + ':asc' # create dictionary to store user login info concurrent_dict = defaultdict(dict) scroll_id = "" scrolling = True scroll_len = 1000 count = 0 error_count = 0 print(colors.bcolors.OKBLUE + '>>> Retrieving information from elasticsearch...') while scrolling: # Retrieve data hits, scroll_id, scroll_size = ht_data.get_data( customer, doc_type, fields, constraints, ignore, scroll_id, scroll_len, sort) # For every unique username (used as dict key), make a dictionary of event activity for entry in hits: try: user = entry['fields'][USER_NAME][0] event = entry['fields'][EVENT_ID][0] except: error_count += 1 continue # If user name has not been added to dictionary, add set login counts to 0 if user not in concurrent_dict: concurrent_dict[user]['logged_on'] = False concurrent_dict[user]['concurrent'] = 0 concurrent_dict[user]['max_concurrent'] = 0 concurrent_dict[user]['src_list'] = [] try: src = entry['fields'][SOURCE_IP][0] except: src = None # Add only unique source ips if src not in concurrent_dict[user]['src_list']: concurrent_dict[user]['src_list'].append(src) # If event id indicates a logon mark the user as such, and add to the concurrent count if # the user is already logged on if event == LOG_ON: if concurrent_dict[user]['logged_on'] == True: concurrent_dict[user]['concurrent'] += 1 if concurrent_dict[user][ 'max_concurrent'] < concurrent_dict[user][ 'concurrent']: concurrent_dict[user][ 'max_concurrent'] = concurrent_dict[user][ 'concurrent'] else: concurrent_dict[user]['logged_on'] = True # If the even id indicates a logoff reduce the concurrent count and, if the concurrent count is # now zero, mark the user as logged off elif (event == LOG_OFF) or (event == LOG_OFF2): if 0 < concurrent_dict[user]['concurrent']: concurrent_dict[user]['concurrent'] -= 1 if concurrent_dict[user]['concurrent'] == 0: concurrent_dict[user]['logged_on'] = False # stop scrolling if no more hits if len(hits) < 1: scrolling = False else: count += len(hits) # Report progress if (count % 10 == 0) or (count == scroll_size): progress_bar(count, scroll_size) if not (len(concurrent_dict) == 0): num_found = 0 print( '>>> Checking for concurrent logins and writing results to elasticsearch... ' + colors.bcolors.ENDC) # record all users with concurrent logins for user, data in concurrent_dict.iteritems(): if data['max_concurrent'] > 0: num_found += 1 write_data(user, data, customer, result_type) print(colors.bcolors.WARNING + '[+] ' + str(num_found) + ' concurrent logins found! [+]' + colors.bcolors.ENDC) else: print( colors.bcolors.WARNING + '\nQuerying elasticsearch failed - Verify your log configuration file!' + colors.bcolors.ENDC) if error_count > 0: print( colors.bcolors.WARNING + '[!] ' + str(error_count) + ' log entries with misnamed or missing field values skipped! [!]' + colors.bcolors.ENDC)
def find_blacklisted_ipvoid(customer, result_type): global CURR_DONE global TOTAL_TO_DO CURR_DONE.value = 0 # Analysis will be done on log files, not results doc_type = 'logs' # restrict results to specified customer constraints = [] # anything we want to filter out ignore = [] print(colors.bcolors.OKBLUE + '>>> Retrieving information from elasticsearch...') # fields to return from elasticsearch query fields = [SOURCE_IP, DESTINATION_IP] scroll_id = "" scroll_len = 1000 scrolling = True count = 0 error_count = 0 # build dictionary for blacklist detection blacklist_dict = defaultdict(list) while scrolling: # Retrieve data hits, scroll_id, scroll_size = ht_data.get_data(customer, doc_type, fields, constraints, ignore, scroll_id, scroll_len) # For every unique destination ip (used as dict key), find make a list of all # src ips that connect to it for entry in hits: count += 1 try: dst = entry['fields'][DESTINATION_IP][0] src = entry['fields'][SOURCE_IP][0] except: error_count += 1 continue # Verify that source IP is internal and that destination ip is external if len(dst) != 0 and len(src) != 0: if (filter_ip(src) == False) and (filter_ip(dst) == True): # Check for duplicate source IPs try: if src not in blacklist_dict[dst]: blacklist_dict[dst].append(src) except: continue if len(hits) < 1: scrolling = False else: progress_bar(count, scroll_size) # Get total number of keys (unique url lengths) total_keys = len(blacklist_dict) # Verify that ES query actually returned some results if not total_keys == 0: print('>>> Querying blacklist....') # Get the multiprocessing stuff ready TOTAL_TO_DO.value = len(blacklist_dict) workers = Pool(64) # create parameter list for threads and keys arglist = [(entry, customer, result_type) for entry in blacklist_dict.items()] # workers.map(find_blacklisted_ipvoid_mp, blacklist_dict.items()) workers.map(find_blacklisted_ipvoid_mp, arglist) else: print (colors.bcolors.WARNING + '[!] Querying elasticsearch failed - Verify your log configuration file! [!]'+ colors.bcolors.ENDC) if error_count > 0: print (colors.bcolors.WARNING + '[!] ' + str(error_count) + ' log entries with misnamed or missing field values skipped! [!]'+ colors.bcolors.ENDC)
def scan_analysis(customer, proto, threshold, graph, graph_thresh, potential_save_dir, result_type): # Search will be conducted in log files doc_type = 'logs' # fields to return from elasticsearch query fields = [SOURCE_IP, DESTINATION_IP, DESTINATION_PORT] # restrict results to specified customer if proto != "" and proto != 'web': constraints = [{PROTOCOL:proto}] else: constraints = [] # anything we want to filter out ignore = [] scroll_id = "" scroll_len = 1000 scrolling = True count = 0 error_count = 0 print(colors.bcolors.OKBLUE + '>>> Retrieving information from elasticsearch and building dictionary...') # build dictionary for scan detection scan_dict = defaultdict(list) while scrolling: # Retrieve data hits, scroll_id, scroll_size = ht_data.get_data(customer, doc_type,fields, constraints, ignore, scroll_id, scroll_len) # Report progress if (count % 10 == 0) or (count == scroll_size): progress_bar(count, scroll_size) for entry in hits: count += 1 try: # Get source ip, destination ip, and port of current log entry src = entry['fields'][SOURCE_IP][0] dst = entry['fields'][DESTINATION_IP][0] dpt = entry['fields'][DESTINATION_PORT][0] if dpt == '': error_count += 1 continue except: error_count += 1 continue # Set up dictionary key as source and destination ip pair key = (src, dst) # Add all destination ports scan_dict[key].append(dpt) if len(hits) < 1: scrolling = False # Get total number of keys (unique source - destination pairs) total_keys = len(scan_dict) if not total_keys == 0: print('>>> Running scan analysis ... ') key_count = 0 unlikely_found = 0 likely_found = 0 # Iterate over all the keys... for key in scan_dict: key_count += 1 if (key_count % 20 == 0) or (key_count == total_keys): progress_bar(key_count, total_keys) # Extract values from key string src = key[0] dst = key[1] # Get ports that match the source-destination pair ports = scan_dict[key] # Get number of unique destination ports num_unique_ports = len(set(ports)) # Get total number ports num_total_ports = len(ports) # If there are more than specified amount of ports, flag as likely scan if num_unique_ports > threshold: if graph and (num_unique_ports > graph_thresh): ports = [int(i) for i in scan_dict[key]] graph_scans(customer, src, dst, proto, ports, threshold, potential_save_dir) write_data(src, dst, ports, num_unique_ports, num_total_ports, proto, customer, result_type) likely_found += 1 else: unlikely_found += 1 # Report number of potential scans found print(colors.bcolors.FAIL + '[!] Found ' + str(likely_found) + ' potential port scans [!]' + colors.bcolors.ENDC) else: print (colors.bcolors.WARNING + '[!] Querying elasticsearch failed - Verify your protocol choice or log configuration file! [!]'+ colors.bcolors.ENDC) if error_count > 0: print (colors.bcolors.WARNING + '[!] ' + str(error_count) + ' log entries with misnamed or missing field values skipped! [!]'+ colors.bcolors.ENDC)
def find_concurrent(customer, result_type): # Search will be conducted in log files doc_type = 'logs' # fields to return from elasticsearch query fields = [EVENT_ID, USER_NAME, SOURCE_IP, TIMESTAMP] # restrict results to specified customer and eventId to list of possible IDs constraints = [] # anything we want to filter out ignore = [] # Sort results by timestamp sort = TIMESTAMP + ':asc' # create dictionary to store user login info concurrent_dict = defaultdict(dict) scroll_id = "" scrolling = True scroll_len = 1000 count = 0 error_count = 0 print(colors.bcolors.OKBLUE +'>>> Retrieving information from elasticsearch...') while scrolling: # Retrieve data hits, scroll_id, scroll_size = ht_data.get_data(customer, doc_type, fields, constraints, ignore, scroll_id, scroll_len, sort) # For every unique username (used as dict key), make a dictionary of event activity for entry in hits: try: user = entry['fields'][USER_NAME][0] event = entry['fields'][EVENT_ID][0] src = entry['fields'][SOURCE_IP][0] except: error_count += 1 continue # If user name has not been added to dictionary, add set login counts to 0 if user not in concurrent_dict: concurrent_dict[user]['logged_on'] = False concurrent_dict[user]['concurrent'] = 0 concurrent_dict[user]['max_concurrent'] = 0 concurrent_dict[user]['src_list'] = [] # Add only unique source ips if src not in concurrent_dict[user]['src_list']: concurrent_dict[user]['src_list'].append(src) # If event id indicates a logon mark the user as such, and add to the concurrent count if # the user is already logged on if event == LOG_ON: if concurrent_dict[user]['logged_on'] == True: concurrent_dict[user]['concurrent'] += 1 if concurrent_dict[user]['max_concurrent'] < concurrent_dict[user]['concurrent']: concurrent_dict[user]['max_concurrent'] = concurrent_dict[user]['concurrent'] else: concurrent_dict[user]['logged_on'] = True # If the even id indicates a logoff reduce the concurrent count and, if the concurrent count is # now zero, mark the user as logged off elif event == LOG_OFF: if 0 < concurrent_dict[user]['concurrent']: concurrent_dict[user]['concurrent'] -= 1 if concurrent_dict[user]['concurrent'] == 0: concurrent_dict[user]['logged_on'] = False # stop scrolling if no more hits if len(hits) < 1: scrolling = False else: count += len(hits) # Report progress if (count % 10 == 0) or (count == scroll_size): progress_bar(count, scroll_size) if not (len(concurrent_dict) == 0): num_found = 0 print('>>> Checking for concurrent logins and writing results to elasticsearch... '+ colors.bcolors.ENDC) # record all users with concurrent logins for user, data in concurrent_dict.iteritems(): if data['max_concurrent'] > 0: num_found += 1 write_data(user, data, customer, result_type) print(colors.bcolors.WARNING + '[+] ' + str(num_found) + ' concurrent logins found! [+]'+ colors.bcolors.ENDC) else: print (colors.bcolors.WARNING + '\nQuerying elasticsearch failed - Verify your log configuration file!'+ colors.bcolors.ENDC) if error_count > 0: print (colors.bcolors.WARNING + '[!] ' + str(error_count) + ' log entries with misnamed or missing field values skipped! [!]'+ colors.bcolors.ENDC)
def find_blacklisted_ipvoid(customer, result_type): global CURR_DONE global TOTAL_TO_DO CURR_DONE.value = 0 # Analysis will be done on log files, not results doc_type = 'logs' # restrict results to specified customer constraints = [] # anything we want to filter out ignore = [] print(colors.bcolors.OKBLUE + '>>> Retrieving information from elasticsearch...') # fields to return from elasticsearch query fields = [SOURCE_IP, DESTINATION_IP] scroll_id = "" scroll_len = 1000 scrolling = True count = 0 error_count = 0 # build dictionary for blacklist detection blacklist_dict = defaultdict(list) while scrolling: # Retrieve data hits, scroll_id, scroll_size = ht_data.get_data( customer, doc_type, fields, constraints, ignore, scroll_id, scroll_len) # For every unique destination ip (used as dict key), find make a list of all # src ips that connect to it for entry in hits: count += 1 try: dst = entry['fields'][DESTINATION_IP][0] src = entry['fields'][SOURCE_IP][0] except: error_count += 1 continue # Verify that source IP is internal and that destination ip is external if len(dst) != 0 and len(src) != 0: if (filter_ip(src) == False) and (filter_ip(dst) == True): # Check for duplicate source IPs try: if src not in blacklist_dict[dst]: blacklist_dict[dst].append(src) except: continue if len(hits) < 1: scrolling = False else: progress_bar(count, scroll_size) # Get total number of keys (unique url lengths) total_keys = len(blacklist_dict) # Verify that ES query actually returned some results if not total_keys == 0: print('>>> Querying blacklist....') # Get the multiprocessing stuff ready TOTAL_TO_DO.value = len(blacklist_dict) workers = Pool(64) # create parameter list for threads and keys arglist = [(entry, customer, result_type) for entry in blacklist_dict.items()] # workers.map(find_blacklisted_ipvoid_mp, blacklist_dict.items()) workers.map(find_blacklisted_ipvoid_mp, arglist) else: print( colors.bcolors.WARNING + '[!] Querying elasticsearch failed - Verify your log configuration file! [!]' + colors.bcolors.ENDC) if error_count > 0: print( colors.bcolors.WARNING + '[!] ' + str(error_count) + ' log entries with misnamed or missing field values skipped! [!]' + colors.bcolors.ENDC)