def test_flow_instantiation_with_line_v2():
    """
    Verifies that RawRating has correct data types
    """
    assert len(MOCKED_CSV_LINE_2) == EXPECTED_KEYS_LEN
    current_flow = Flow.from_line(MOCKED_CSV_LINE_2)
    assert isinstance(current_flow.src_address, str)
    assert isinstance(current_flow.events, float)
    assert isinstance(current_flow.avg_events, float)
    assert isinstance(current_flow.duration, float)
    assert isinstance(current_flow.avg_duration, float)
    assert isinstance(current_flow.bytes, float)
    assert isinstance(current_flow.avg_bytes, float)
    assert isinstance(current_flow.packets, float)
    assert isinstance(current_flow.avg_packets, float)
    assert isinstance(current_flow.first_event, float)
    assert isinstance(current_flow.last_event, float)
    assert isinstance(current_flow.aged_score, float)
Ejemplo n.º 2
0
    def open_sort_abs_file(self, absolute_data_path) -> tuple:
        """
        Provides a tuple(list, list) with IPs flow and IPs from the main
        data file

        :param absolute_data_path: filepath where all IPs flows are stored

        :return: tuple(list, list) ip_flows, ips
        """
        ip_flows = []
        ips_in_absolute_file = []
        with open(absolute_data_path, "r", encoding="utf-8") as csv_file:
            for line in csv.reader(csv_file):
                if line:
                    ip_flows.append(Flow.from_line(line))
                    ips_in_absolute_file.append(line[0])

        return ip_flows, ips_in_absolute_file
def test_flow_instantiation_with_dict_v2():
    """
    Verifies that RawRating instantiates successfdully and
    has correct data types when all required keys are present
    while having float values
    """
    assert len(MOCKED_VALID_DICT_2.keys()) == EXPECTED_KEYS_LEN
    current_flow = Flow.from_dict(MOCKED_VALID_DICT_2)
    assert isinstance(current_flow.src_address, str)
    assert isinstance(current_flow.events, float)
    assert isinstance(current_flow.avg_events, float)
    assert isinstance(current_flow.duration, float)
    assert isinstance(current_flow.avg_duration, float)
    assert isinstance(current_flow.bytes, float)
    assert isinstance(current_flow.avg_bytes, float)
    assert isinstance(current_flow.packets, float)
    assert isinstance(current_flow.avg_packets, float)
    assert isinstance(current_flow.first_event, float)
    assert isinstance(current_flow.last_event, float)
    assert isinstance(current_flow.aged_score, float)
Ejemplo n.º 4
0
    def open_sort_new_file(self, raw_data_dir_path, new_files) -> tuple:
        """
        Provides a tuple(list, list) with new IPs flow and IPs from input
        raw data dir

        :param raw_data_dir_path: dir path of input data
        :param new_files: list of unprocessed new data files

        :return: tuple(list, list) new_ip_flows, new_ips
        """
        new_ip_flows = []
        new_ips = []
        for file in new_files:
            with open(f"{raw_data_dir_path}/{file}", 'r') as csv_file:
                for line in csv.reader(csv_file):
                    if line[0] != 'SrcAddr':
                        new_ip_flows.append(Flow.from_line(line))
                        new_ips.append(line[0])

        return new_ip_flows, new_ips
Ejemplo n.º 5
0
    def get_updated_flows(self, absolute_data_path) -> list:
        """
        Returns a list of the updated IP flows from absolute data file

        :param absolute_data_path: filepath where all IPs flows are stored

        :return: list of all IP flows
        """
        try:
            with open(absolute_data_path, "r", encoding="utf-8") as csv_file:
                ip_flows = [
                    Flow.from_line(line) for line in csv.reader(csv_file)
                    if line
                ]
        except IOError as e:
            logger.exception(f"Unable to open {absolute_data_path} file: {e}")
            return []

        logger.debug(f"Updated flows successfully: {ip_flows}")

        return ip_flows
def test_failed_flow_instantiation_from_dict_v2():
    """
    Verifies that Flow fails instantiation with TypeError
    """
    with pytest.raises(TypeError) as e:
        return Flow.from_dict(MOCKED_INVALID_DICT_2)
def test_failed_flow_instantiation_from_line_v1():
    """
    Verifies that Flow fails instantiation with TypeError
    """
    with pytest.raises(ValueError) as e:
        return Flow.from_line(MOCKED_INVALID_CSV_LINE_1)
def test_failed_flow_instantiation_from_line_v1():
    """
    Verifies that Flow fails instantiation if missing line
    """
    with pytest.raises(Exception) as e:
        return Flow.from_line()
def test_failed_flow_instantiation():
    """
    Verifies that Flow fails instantiation if missing dict
    """
    with pytest.raises(Exception) as e:
        return Flow.from_dict()
Ejemplo n.º 10
0
    def update_records_files(self, absolute_data_path, new_known_ip_flows,
                             unknown_ip_flows, current_time) -> None:
        """
        Updates record files for IP flows and filter those which are safelisted

        :param absolute_data_path: filepath where all IPs flows are stored
        :param new_known_ip_flows: list of new known IP flows
        :param unknown_ip_flows: list of unknown IP flows
        :param current_time: datetime object
        """
        known_ip_flows, ips_in_abs_file = self.open_sort_abs_file(
            absolute_data_path)
        new_absolute_flows = []
        new_absolute_flows.extend(known_ip_flows)
        new_unknown_ip_flows = [
            unknown_ip_flow for unknown_ip_flow in unknown_ip_flows
        ]
        new_absolute_flows.extend(new_unknown_ip_flows)

        if new_known_ip_flows:
            for idx1, new_flow in enumerate(new_known_ip_flows):
                for idx2, absolute_flow in enumerate(new_absolute_flows):
                    if absolute_flow.src_address == new_flow.src_address:
                        days_since_first_seen = (
                            current_time - absolute_flow.last_event
                        ) // Defaults.MINUTES_A_DAY.value
                        dh_events = (
                            absolute_flow.avg_events *
                            (days_since_first_seen - 1)) + new_flow.events

                        if days_since_first_seen != 0:
                            updated_events_average = dh_events / days_since_first_seen
                        else:
                            updated_events_average = dh_events

                        updated_flow = Flow()
                        updated_flow.src_address = new_flow.src_address
                        updated_flow.events = absolute_flow.events + new_flow.events
                        updated_flow.duration = absolute_flow.duration + new_flow.duration
                        updated_flow.avg_duration = (
                            absolute_flow.avg_duration +
                            new_flow.avg_duration) / 2.0
                        updated_flow.bytes = absolute_flow.bytes + new_flow.bytes
                        updated_flow.avg_bytes = (absolute_flow.avg_bytes +
                                                  new_flow.avg_bytes) / 2.0
                        updated_flow.packets = absolute_flow.packets + new_flow.packets
                        updated_flow.avg_packets = (absolute_flow.avg_packets +
                                                    new_flow.avg_packets) / 2.0
                        updated_flow.first_event = absolute_flow.first_event
                        updated_flow.last_event = new_flow.last_event
                        updated_flow.avg_events = updated_events_average

                        new_absolute_flows[idx2] = updated_flow

        current_directory = os.getcwd()
        safelist = Safelist()
        asn_db_path = f"{current_directory}{DirPaths.ASN.value}{FilePaths.ASN_DB.value}"
        asn_info = safelist.get_asn_data(asn_db_path, new_absolute_flows)
        list_of_FPs = []  # What does FPs stand for?

        for index, flow in enumerate(new_absolute_flows):
            first_judgement = safelist.check_if_ip_in_safelisted_nets(
                flow.src_address)
            second_judgement = safelist.check_if_ip_in_safelisted_ips(
                flow.src_address)
            third_judgement, org_name = safelist.check_if_org_in_safelisted_orgs(
                asn_info[flow.src_address])

            if first_judgement:
                list_of_FPs.append(flow)
                del new_absolute_flows[index]
                logger.info(
                    f"Found {flow.src_address} in safelisted Nets. Deleting entry...\n"
                )
            elif second_judgement:
                list_of_FPs.append(flow)
                del new_absolute_flows[index]
                logger.info(
                    f"Found {flow.src_address} in safelisted IPs. Deleting entry...\n"
                )
            elif third_judgement:
                list_of_FPs.append(flow)
                del new_absolute_flows[index]
                logger.info(
                    f"Found {flow.src_address} ASN matches organization {org_name} Deleting entry...\n"
                )

        try:
            with open(self.fp_log_filepath, "a", encoding="utf-8") as FP_file:
                csv_writer = csv.writer(FP_file)
                csv_writer.writerows(list_of_FPs)
        except IOError as e:
            logger.exception(
                f"Unable to append data to {self.fp_log_filepath} file: {e}\n")
            raise e

        try:
            with open(absolute_data_path, "w", encoding="utf-8") as csv_file:
                csv_writer = csv.writer(csv_file, quoting=csv.QUOTE_ALL)
                for new_flow in new_absolute_flows:
                    csv_writer.writerow(new_flow)
        except IOError as e:
            logger.exception(
                f"Unable to save {absolute_data_path} file: {e}\n")
            raise e

        logger.debug("Updated record files for IP flows successfully\n")