def test_flow_instantiation_with_line_v2(): """ Verifies that RawRating has correct data types """ assert len(MOCKED_CSV_LINE_2) == EXPECTED_KEYS_LEN current_flow = Flow.from_line(MOCKED_CSV_LINE_2) assert isinstance(current_flow.src_address, str) assert isinstance(current_flow.events, float) assert isinstance(current_flow.avg_events, float) assert isinstance(current_flow.duration, float) assert isinstance(current_flow.avg_duration, float) assert isinstance(current_flow.bytes, float) assert isinstance(current_flow.avg_bytes, float) assert isinstance(current_flow.packets, float) assert isinstance(current_flow.avg_packets, float) assert isinstance(current_flow.first_event, float) assert isinstance(current_flow.last_event, float) assert isinstance(current_flow.aged_score, float)
def open_sort_abs_file(self, absolute_data_path) -> tuple: """ Provides a tuple(list, list) with IPs flow and IPs from the main data file :param absolute_data_path: filepath where all IPs flows are stored :return: tuple(list, list) ip_flows, ips """ ip_flows = [] ips_in_absolute_file = [] with open(absolute_data_path, "r", encoding="utf-8") as csv_file: for line in csv.reader(csv_file): if line: ip_flows.append(Flow.from_line(line)) ips_in_absolute_file.append(line[0]) return ip_flows, ips_in_absolute_file
def test_flow_instantiation_with_dict_v2(): """ Verifies that RawRating instantiates successfdully and has correct data types when all required keys are present while having float values """ assert len(MOCKED_VALID_DICT_2.keys()) == EXPECTED_KEYS_LEN current_flow = Flow.from_dict(MOCKED_VALID_DICT_2) assert isinstance(current_flow.src_address, str) assert isinstance(current_flow.events, float) assert isinstance(current_flow.avg_events, float) assert isinstance(current_flow.duration, float) assert isinstance(current_flow.avg_duration, float) assert isinstance(current_flow.bytes, float) assert isinstance(current_flow.avg_bytes, float) assert isinstance(current_flow.packets, float) assert isinstance(current_flow.avg_packets, float) assert isinstance(current_flow.first_event, float) assert isinstance(current_flow.last_event, float) assert isinstance(current_flow.aged_score, float)
def open_sort_new_file(self, raw_data_dir_path, new_files) -> tuple: """ Provides a tuple(list, list) with new IPs flow and IPs from input raw data dir :param raw_data_dir_path: dir path of input data :param new_files: list of unprocessed new data files :return: tuple(list, list) new_ip_flows, new_ips """ new_ip_flows = [] new_ips = [] for file in new_files: with open(f"{raw_data_dir_path}/{file}", 'r') as csv_file: for line in csv.reader(csv_file): if line[0] != 'SrcAddr': new_ip_flows.append(Flow.from_line(line)) new_ips.append(line[0]) return new_ip_flows, new_ips
def get_updated_flows(self, absolute_data_path) -> list: """ Returns a list of the updated IP flows from absolute data file :param absolute_data_path: filepath where all IPs flows are stored :return: list of all IP flows """ try: with open(absolute_data_path, "r", encoding="utf-8") as csv_file: ip_flows = [ Flow.from_line(line) for line in csv.reader(csv_file) if line ] except IOError as e: logger.exception(f"Unable to open {absolute_data_path} file: {e}") return [] logger.debug(f"Updated flows successfully: {ip_flows}") return ip_flows
def test_failed_flow_instantiation_from_dict_v2(): """ Verifies that Flow fails instantiation with TypeError """ with pytest.raises(TypeError) as e: return Flow.from_dict(MOCKED_INVALID_DICT_2)
def test_failed_flow_instantiation_from_line_v1(): """ Verifies that Flow fails instantiation with TypeError """ with pytest.raises(ValueError) as e: return Flow.from_line(MOCKED_INVALID_CSV_LINE_1)
def test_failed_flow_instantiation_from_line_v1(): """ Verifies that Flow fails instantiation if missing line """ with pytest.raises(Exception) as e: return Flow.from_line()
def test_failed_flow_instantiation(): """ Verifies that Flow fails instantiation if missing dict """ with pytest.raises(Exception) as e: return Flow.from_dict()
def update_records_files(self, absolute_data_path, new_known_ip_flows, unknown_ip_flows, current_time) -> None: """ Updates record files for IP flows and filter those which are safelisted :param absolute_data_path: filepath where all IPs flows are stored :param new_known_ip_flows: list of new known IP flows :param unknown_ip_flows: list of unknown IP flows :param current_time: datetime object """ known_ip_flows, ips_in_abs_file = self.open_sort_abs_file( absolute_data_path) new_absolute_flows = [] new_absolute_flows.extend(known_ip_flows) new_unknown_ip_flows = [ unknown_ip_flow for unknown_ip_flow in unknown_ip_flows ] new_absolute_flows.extend(new_unknown_ip_flows) if new_known_ip_flows: for idx1, new_flow in enumerate(new_known_ip_flows): for idx2, absolute_flow in enumerate(new_absolute_flows): if absolute_flow.src_address == new_flow.src_address: days_since_first_seen = ( current_time - absolute_flow.last_event ) // Defaults.MINUTES_A_DAY.value dh_events = ( absolute_flow.avg_events * (days_since_first_seen - 1)) + new_flow.events if days_since_first_seen != 0: updated_events_average = dh_events / days_since_first_seen else: updated_events_average = dh_events updated_flow = Flow() updated_flow.src_address = new_flow.src_address updated_flow.events = absolute_flow.events + new_flow.events updated_flow.duration = absolute_flow.duration + new_flow.duration updated_flow.avg_duration = ( absolute_flow.avg_duration + new_flow.avg_duration) / 2.0 updated_flow.bytes = absolute_flow.bytes + new_flow.bytes updated_flow.avg_bytes = (absolute_flow.avg_bytes + new_flow.avg_bytes) / 2.0 updated_flow.packets = absolute_flow.packets + new_flow.packets updated_flow.avg_packets = (absolute_flow.avg_packets + new_flow.avg_packets) / 2.0 updated_flow.first_event = absolute_flow.first_event updated_flow.last_event = new_flow.last_event updated_flow.avg_events = updated_events_average new_absolute_flows[idx2] = updated_flow current_directory = os.getcwd() safelist = Safelist() asn_db_path = f"{current_directory}{DirPaths.ASN.value}{FilePaths.ASN_DB.value}" asn_info = safelist.get_asn_data(asn_db_path, new_absolute_flows) list_of_FPs = [] # What does FPs stand for? for index, flow in enumerate(new_absolute_flows): first_judgement = safelist.check_if_ip_in_safelisted_nets( flow.src_address) second_judgement = safelist.check_if_ip_in_safelisted_ips( flow.src_address) third_judgement, org_name = safelist.check_if_org_in_safelisted_orgs( asn_info[flow.src_address]) if first_judgement: list_of_FPs.append(flow) del new_absolute_flows[index] logger.info( f"Found {flow.src_address} in safelisted Nets. Deleting entry...\n" ) elif second_judgement: list_of_FPs.append(flow) del new_absolute_flows[index] logger.info( f"Found {flow.src_address} in safelisted IPs. Deleting entry...\n" ) elif third_judgement: list_of_FPs.append(flow) del new_absolute_flows[index] logger.info( f"Found {flow.src_address} ASN matches organization {org_name} Deleting entry...\n" ) try: with open(self.fp_log_filepath, "a", encoding="utf-8") as FP_file: csv_writer = csv.writer(FP_file) csv_writer.writerows(list_of_FPs) except IOError as e: logger.exception( f"Unable to append data to {self.fp_log_filepath} file: {e}\n") raise e try: with open(absolute_data_path, "w", encoding="utf-8") as csv_file: csv_writer = csv.writer(csv_file, quoting=csv.QUOTE_ALL) for new_flow in new_absolute_flows: csv_writer.writerow(new_flow) except IOError as e: logger.exception( f"Unable to save {absolute_data_path} file: {e}\n") raise e logger.debug("Updated record files for IP flows successfully\n")