Esempio n. 1
0
    def read_from_csv(self, input_file, delimiter):
        """
        Read venues from a CSV file (header required).
        :param input_file: Path to the CSV file.
        :param delimiter: Column delimiter in CSV file (typically ',').
        """

        # read CSV as UTF-8 encoded file (see also http://stackoverflow.com/a/844443)
        with codecs.open(input_file, encoding='utf8') as fp:
            logger.info("Reading venues from " + input_file + "...")

            reader = csv.reader(fp, delimiter=delimiter)

            # read header
            header = next(reader, None)
            if not header:
                raise IllegalArgumentError("Missing header in CSV file.")

            venue_index = header.index("venue")
            year_index = header.index("year")
            identifier_index = header.index("identifier")

            # read CSV file
            for row in reader:
                if row:
                    self.venues.append(
                        Venue(row[venue_index], row[year_index],
                              row[identifier_index]))
                else:
                    raise IllegalArgumentError("Wrong CSV format.")

        self.filename = os.path.basename(input_file)
        logger.info(str(len(self.venues)) + " venues have been imported.")
Esempio n. 2
0
    def read_from_csv(self, input_file, exact_matches, replace_parentheses,
                      delimiter):
        """
        Read search queries from a CSV file (header required).
        :param replace_parentheses: Replace Wikipedia parentheses in query strings
        :param exact_matches: Only search for exact matches of query strings
        :param input_file: Path to the CSV file.
        :param delimiter: Column delimiter in CSV file (typically ',').
        """

        # read CSV as UTF-8 encoded file (see also http://stackoverflow.com/a/844443)
        with codecs.open(input_file, encoding='utf8') as fp:
            logger.info("Reading search queries from " + input_file + "...")

            reader = csv.reader(fp, delimiter=delimiter)

            # read header
            header = next(reader, None)
            if not header:
                raise IllegalArgumentError("Missing header in CSV file.")

            query = header.index("query")

            # read CSV file
            for row in reader:
                if row:
                    self.values.append(
                        Query(row[query], exact_matches, replace_parentheses))
                else:
                    raise IllegalArgumentError("Wrong CSV format.")

        self.filename = os.path.basename(input_file)
        logger.info(
            str(len(self.values)) + " search queries have been imported.")
Esempio n. 3
0
    def add(self, entities):
        error_message = "Argument must be object of class Entity or class EntityList."

        if isinstance(entities, Entity):
            self.entities.append(entities)
        elif isinstance(entities, EntityList):
            self.entities = self.entities + entities.entities
        elif isinstance(entities, list):
            for element in entities:
                if not isinstance(element, Entity):
                    raise IllegalArgumentError(error_message)
                self.entities.append(element)
        else:
            raise IllegalArgumentError(error_message)
        self.set_predecessors()
Esempio n. 4
0
    def __init__(self, configuration, input_parameter_values, predecessor):
        """
        To initialize an entity, a corresponding entity configuration together
        and values for the input parameter(s) are needed.
        :param configuration: an object of class EntityConfiguration
        :param input_parameter_values: A dictionary with values for the input parameters defined in the configuration.
        :param predecessor: predecessor in entity list
        """

        # corresponding entity configuration
        self.configuration = configuration
        # parameters needed to identify entity (or for validation)
        self.input_parameters = OrderedDict.fromkeys(
            configuration.input_parameters)
        # parameters that should be retrieved using the API
        self.output_parameters = OrderedDict.fromkeys(
            configuration.output_parameter_mapping.keys())
        # destination path for raw download
        self.destination = None

        # set values for input parameters
        for parameter in configuration.input_parameters:
            if parameter in input_parameter_values:
                self.input_parameters[parameter] = input_parameter_values[
                    parameter]
            else:
                raise IllegalArgumentError("Illegal input parameter: " +
                                           parameter)

        # get uri for this entity from uri template in the configuration
        uri_variable_values = {**self.input_parameters}

        # add values for API keys
        for i in range(0, len(self.configuration.api_keys)):
            uri_variable_values["api_key_" +
                                str(i + 1)] = self.configuration.api_keys[i]

        # set values for range variables
        for range_var_name in configuration.range_vars:
            if not range_var_name in input_parameter_values:
                continue
            uri_variable_values[range_var_name] = input_parameter_values[
                range_var_name]

        self.uri = self.configuration.uri_template.replace_variables(
            uri_variable_values)

        # set predecessor
        self.predecessor = predecessor
        # root entity is set if range variables are used
        self.root_entity = None

        # store JSON response data (may be needed by callbacks)
        self.json_response = None
Esempio n. 5
0
    def write_to_csv(self,
                     output_dir,
                     delimiter,
                     include_language,
                     filename=None):
        """
        Export search results to a CSV file.
        :param output_dir: Target directory for generated CSV file.
        :param delimiter: Column delimiter in CSV file (typically ',').
        :param include_language: Add column "language" if tool was configured to detect languages of snippets.
        :param filename: Filename of file to export.
        """

        if filename is not None:
            self.filename = filename

        if len(self.values) == 0:
            logger.info("Nothing to export.")
            return

        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        file_path = os.path.join(output_dir, self.filename)

        # write search results to UTF8-encoded CSV file (see also http://stackoverflow.com/a/844443)
        with codecs.open(file_path, 'w', encoding='utf8') as fp:
            logger.info('Exporting search results to ' + file_path + '...')
            writer = csv.writer(fp, delimiter=delimiter)

            column_names = SearchResult.get_column_names(include_language)

            # write header of CSV file
            writer.writerow(column_names)

            count = 0
            try:
                for row in self.get_rows(include_language):
                    if len(row) == len(column_names):
                        writer.writerow(row)
                        count = count + 1
                    else:
                        raise IllegalArgumentError(
                            str(abs(len(column_names) - len(row))) +
                            ' parameter(s) is/are missing for "' + str(row) +
                            '"')

            except UnicodeEncodeError:
                logger.error('Encoding error while writing data for: ' +
                             str(row))

            logger.info(str(count) + ' search results have been exported.')
Esempio n. 6
0
    def read_from_csv(self, input_file, delimiter):
        """
        Read search results from a CSV file (header required).
        :param input_file: Path to the CSV file.
        :param delimiter: Column delimiter in CSV file (typically ',').
        """

        # read CSV as UTF-8 encoded file (see also http://stackoverflow.com/a/844443)
        with codecs.open(input_file, encoding='utf8') as fp:
            logger.info("Reading search results from " + input_file + "...")

            reader = csv.reader(fp, delimiter=delimiter)

            # read header
            header = next(reader, None)
            if not header:
                raise IllegalArgumentError("Missing header in CSV file.")

            query_index = header.index("query")
            rank_index = header.index("rank")
            url_index = header.index("url")
            title_index = header.index("title")
            snippet_index = header.index("snippet")

            # read CSV file
            for row in reader:
                if row:
                    self.values.append(
                        SearchResult(row[query_index], row[rank_index],
                                     row[url_index], row[title_index],
                                     row[snippet_index]))
                else:
                    raise IllegalArgumentError("Wrong CSV format.")

        self.filename = os.path.basename(input_file)
        logger.info(
            str(len(self.values)) + " search results have been imported.")
Esempio n. 7
0
    def write_to_csv(self, output_dir, delimiter):
        """
        Export papers retrieved from venues to a CSV file.
        :param output_dir: Target directory for generated CSV file.
        :param delimiter: Column delimiter in CSV file (typically ',').
        """

        if len(self.venues) == 0:
            logger.info("Nothing to export.")
            return

        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        file_path = os.path.join(output_dir, self.filename)

        # write paper list to UTF8-encoded CSV file (see also http://stackoverflow.com/a/844443)
        with codecs.open(file_path, 'w', encoding='utf8') as fp:
            logger.info('Exporting papers to ' + file_path + '...')
            writer = csv.writer(fp, delimiter=delimiter)

            column_names = Paper.get_column_names()

            # write header of CSV file
            writer.writerow(column_names)

            count = 0
            for venue in self.venues:
                try:
                    for row in venue.get_rows():
                        if len(row) == len(column_names):
                            writer.writerow(row)
                            count = count + 1
                        else:
                            raise IllegalArgumentError(
                                str(len(column_names) - len(row)) +
                                " parameter(s) is/are missing for venue " +
                                venue.identifier)

                except UnicodeEncodeError:
                    logger.error(
                        "Encoding error while writing data for venue: " +
                        venue.identifier)

            logger.info(str(count) + ' papers have been exported.')
Esempio n. 8
0
    def replace_variables(self, variable_values):
        """
        Replace all variables in the URI template with actual values.
        :param variable_values: A dictionary with values for the variables in the URI template.
        :return: The final URI string.
        """

        uri = self.uri_template_str
        uri_variables = self.get_variables()

        for variable in uri_variables:
            value = variable_values.get(variable, None)
            if value:
                uri = uri.replace("{" + variable + "}",
                                  urllib.parse.quote(value))
            else:
                IllegalArgumentError("Value for URI variable " + variable +
                                     " missing.")

        return uri
Esempio n. 9
0
def main():
    # parse command line arguments
    parser = get_argument_parser()
    args = parser.parse_args()

    # parse config file
    config = configparser.ConfigParser()
    config.read(args.config_file)

    # read configuration
    if 'DEFAULT' not in config:
        logger.error("DEFAULT configuration missing.\nTerminating.")
        sys.exit()

    # i/o
    input_file = str(config['DEFAULT'].get('InputFile', None))
    output_dir = str(config['DEFAULT'].get('OutputDirectory', None))
    delimiter = str(config['DEFAULT'].get('Delimiter', None))

    if input_file is None or output_dir is None or delimiter is None:
        logger.error("Required configuration missing.\nTerminating.")
        sys.exit()

    # requests
    exact_matches = config['DEFAULT'].getboolean('ExactMatches', True)
    replace_parentheses = config['DEFAULT'].getboolean('ReplaceParentheses',
                                                       True)
    max_results = config['DEFAULT'].getint('MaxResults', 25)
    min_wait = config['DEFAULT'].getint('MinWait', 500)
    max_wait = config['DEFAULT'].getint('MaxWait', 2000)

    # detecting languages of snippets
    detect_languages = config['DEFAULT'].getboolean('DetectLanguages', True)

    queries_only = False

    # read CSV as UTF-8 encoded file (see also http://stackoverflow.com/a/844443)
    with codecs.open(input_file, encoding='utf8') as fp:
        logger.info("Checking input format in " + input_file + "...")
        reader = csv.reader(fp, delimiter=delimiter)
        # read header
        header = next(reader, None)
        if not header:
            raise IllegalArgumentError("Missing header in CSV file.")
        queries_only = len(header) == 1

    if queries_only:
        logger.info(
            "Input file contains only queries, retrieving search results...")
        query_list = QueryList()
        query_list.read_from_csv(input_file, exact_matches,
                                 replace_parentheses, delimiter)
        query_list.retrieve_search_results(max_results, min_wait, max_wait,
                                           detect_languages)
        query_list.write_search_results_to_csv(output_dir, delimiter,
                                               detect_languages)
    elif detect_languages:
        logger.info(
            "Input file contains search results, detecting language of snippets..."
        )
        search_result_list = SearchResultList()
        search_result_list.read_from_csv(input_file, delimiter)
        search_result_list.detect_languages()
        search_result_list.write_to_csv(output_dir, delimiter,
                                        detect_languages)
    else:
        logger.info("No action configured, terminating...")
Esempio n. 10
0
    def get_chained_request_entities(self, chained_request_config):
        """
        Execute a chained request after retrieving the data for this entity.
        :param chained_request_config: The configuration to use for the chained request.
        :return: The entities retrieved using the chained request.
        """

        # check if provided configuration has same name as defined for chained request in own configuration
        if self.configuration.chained_request_name == chained_request_config.name:
            # get input parameters for chained request from input and output parameters of this entity
            try:
                selected_input_parameters = self.configuration.chained_request_input_parameters[
                    "input_parameters"]
                selected_output_parameters = self.configuration.chained_request_input_parameters[
                    "output_parameters"]

                # simple input parameters for chained request selected from input and output parameters of this entity
                input_parameters_chained_request = {}
                # the operator "._" can be used to flatten a list output parameter for the chained request
                flatten_parameters_chained_request = {}

                for parameter in selected_input_parameters:
                    if parameter in self.input_parameters.keys():
                        input_parameters_chained_request[
                            parameter] = self.input_parameters[parameter]
                    else:
                        raise IllegalConfigurationError(
                            "Input parameter for chained request not found: " +
                            str(parameter))

                for parameter in selected_output_parameters:
                    if "._" in parameter:  # flatten operator
                        # get parameter that should be flattened
                        flatten_parameter_match = FLATTEN_OPERATOR_REGEX.match(
                            parameter)
                        if flatten_parameter_match:
                            flatten_parameter = flatten_parameter_match.group(
                                1)
                            # get parameter to flatten from output parameters of this entity
                            parameter_to_flatten_list = self.output_parameters[
                                flatten_parameter]
                            if parameter_to_flatten_list:
                                if isinstance(
                                        parameter_to_flatten_list,
                                        list):  # only lists can be flattened
                                    flatten_parameters_chained_request[
                                        flatten_parameter] = parameter_to_flatten_list
                                else:
                                    raise IllegalConfigurationError(
                                        "Parameter should be flattened, but is not a list: "
                                        + str(parameter))
                        else:
                            raise IllegalConfigurationError(
                                "Wrong usage of flatten operator: Expected: <parameter>._ "
                                "Actual: " + str(parameter))
                    else:  # simple parameter
                        if parameter in self.output_parameters.keys():
                            input_parameters_chained_request[
                                parameter] = self.output_parameters[parameter]
                        else:
                            raise IllegalConfigurationError(
                                "Input parameter for chained request not found: "
                                + str(parameter))

                chained_request_entities = list()

                if len(flatten_parameters_chained_request
                       ) > 0:  # flatten parameters defined
                    # we only support one flatten operator in the input parameter mapping for the chained request
                    if len(flatten_parameters_chained_request) > 1:
                        raise IllegalConfigurationError(
                            "Only one flatten operator supported, but " +
                            str(len(flatten_parameters_chained_request)) +
                            " provided.")

                    for flatten_parameter in flatten_parameters_chained_request.keys(
                    ):
                        parameter_to_flatten_list = flatten_parameters_chained_request[
                            flatten_parameter]
                        if len(parameter_to_flatten_list) > 0:
                            inner_parameters = parameter_to_flatten_list[
                                0].keys()
                            # check if inner parameter name conflicts with existing input parameters for chained request
                            for inner_parameter in inner_parameters:
                                if inner_parameter in input_parameters_chained_request.keys(
                                ):
                                    raise IllegalConfigurationError(
                                        "Inner parameter " + inner_parameter +
                                        " of " + str(flatten_parameter) +
                                        " already exists in list of chained input parameters."
                                    )
                            # extract inner parameters and combine them with outer parameters to flatten the list
                            for list_element in parameter_to_flatten_list:
                                flattened_input_parameters_chained_request = {
                                    **input_parameters_chained_request
                                }
                                for inner_parameter in inner_parameters:
                                    flattened_input_parameters_chained_request[inner_parameter] = \
                                        list_element[inner_parameter]
                                chained_request_entities.append(
                                    Entity(
                                        chained_request_config,
                                        flattened_input_parameters_chained_request,
                                        None))

                else:  # no flatten parameters defined
                    chained_request_entities.append(
                        Entity(chained_request_config,
                               input_parameters_chained_request, None))

            except KeyError as e:
                raise IllegalConfigurationError(
                    "Reading chained request from configuration failed: Parameter "
                    + str(e) + " not found.")
        else:
            raise IllegalArgumentError(
                "Configuration <" + str(chained_request_config.name) +
                "> provided, but <" +
                str(self.configuration.chained_request_name) +
                "> needed for chained request.")

        return chained_request_entities
Esempio n. 11
0
    def apply_filter(json_response, parameter_filter):
        """
        Use an access path (e.g., ["user", "first_name"]) to filter a nested dictionary.
        :param json_response: The JSON response to filter.
        :param parameter_filter: A list with keys for filtering a nested dictionary
            or with the list matching operator "*" followed by an optional parameter mapping for the list elements.
        :return: The extracted value if the filter has successfully been applied 
            (can be a simple value, dict, or list), None otherwise.
        """

        # start with whole JSON response
        filtered_response = json_response

        # apply the filter path
        for pos in range(len(parameter_filter)):
            current_filter = parameter_filter[pos]

            if current_filter == "*":  # list matching operator
                extracted_list = []
                if isinstance(filtered_response, list):
                    if pos == len(
                            parameter_filter
                    ) - 1:  # if no further arguments are provided, save complete list
                        for element in filtered_response:
                            extracted_list.append(element)
                    elif pos == len(
                            parameter_filter
                    ) - 2:  # next element is mapping for list element parameters
                        if isinstance(parameter_filter[pos + 1], dict):
                            list_element_filter = parameter_filter[pos + 1]
                            for element in filtered_response:
                                filtered_element = OrderedDict.fromkeys(
                                    list_element_filter.keys())
                                for parameter in filtered_element.keys():
                                    filtered_element[parameter] = \
                                        Entity.apply_filter(element, list_element_filter[parameter])
                                extracted_list.append(filtered_element)
                        else:
                            raise IllegalArgumentError(
                                "The list matching operator must be succeeded by a filter "
                                "object.")
                    else:
                        raise IllegalArgumentError(
                            "The list matching operator must be the last or second-last element "
                            "of  the filter path.")
                else:
                    raise IllegalArgumentError(
                        "List matching operator reached, but current position in response is "
                        "not a list.")
                # return extracted list as defined by the list matching operator
                return extracted_list

            else:
                # normal filter path
                if not isinstance(current_filter, list) and not isinstance(
                        current_filter, dict):
                    try:
                        # filter may be an index for a list
                        if isinstance(filtered_response,
                                      list) and Entity.parsable_as_int(
                                          current_filter):
                            index = int(current_filter)
                            filtered_response = filtered_response[index]
                        else:
                            # use current string as dictionary key to filter the response
                            if filtered_response[current_filter] is None:
                                logger.info("Result for filter " +
                                            current_filter + " was None.")
                                return "None"
                            else:
                                filtered_response = filtered_response[
                                    current_filter]
                    except (KeyError, IndexError):
                        logger.error("Could not apply filter <" +
                                     str(current_filter) + "> to response " +
                                     str(filtered_response) + ".")
                        return None
                else:
                    raise IllegalArgumentError(
                        "A filter path must only contain filter strings or the list matching "
                        "operator (optionally followed by a filter object).")

        return filtered_response
Esempio n. 12
0
    def read_from_csv(self, input_file, delimiter):
        """
        Read entity input parameter values from a CSV file (header required).
        :param input_file: Path to the CSV file.
        :param delimiter: Column delimiter in CSV file (typically ',').
        """

        # read CSV as UTF-8 encoded file (see also http://stackoverflow.com/a/844443)
        with codecs.open(input_file, encoding='utf8') as fp:
            if self.chunk_size == 0:
                interval = "[" + str(self.start_index) + ", max]"
            else:
                interval = "[" + str(self.start_index) + ", " + str(self.start_index+self.chunk_size-1) + "]"
            logger.info("Reading entities in " + interval + " from " + input_file + "...")

            reader = csv.reader(fp, delimiter=delimiter)

            # check if one of the input parameters is an URI
            uri_input_parameters = OrderedDict()
            for parameter in self.configuration.input_parameters:
                if isinstance(parameter, list):
                    if not len(parameter) == 3 and parameter[1].startswith("http"):
                        raise IllegalConfigurationError("Malformed URI input parameter, should be" +
                                                        "[parameter, uri, response_filter].")

                    uri_parameter = parameter[0]
                    uri = parameter[1]
                    response_filter = parameter[2]
                    logger.info("Found URI input parameter: " + str(uri_parameter))

                    logger.info("Retrieving data for URI input parameter " + str(uri_parameter) + "...")

                    try:
                        # retrieve data
                        response = self.session.get(uri)

                        if response.ok:
                            logger.info("Successfully retrieved data for URI input parameter " + str(uri_parameter) + ".")

                            # deserialize JSON string
                            json_response = json.loads(response.text)

                            filter_result = Entity.apply_filter(json_response, response_filter)
                            uri_input_parameters[uri_parameter] = filter_result

                        else:
                            raise IllegalConfigurationError("Error " + str(response.status_code)
                                                            + ": Could not retrieve data for URI input parameter "
                                                            + str(uri_parameter) + ". Response: "
                                                            + str(response.content))

                    except (gaierror,
                            ConnectionError,
                            MaxRetryError,
                            NewConnectionError):
                        logger.error("An error occurred while retrieving data for URI input parameter "
                                     + str(uri_parameter) + ".")

                    # replace URI parameter with URI parameter name
                    self.configuration.input_parameters.remove(parameter)
                    self.configuration.input_parameters.append(uri_parameter)

            # dictionary to store CSV column indices for input parameters
            input_parameter_indices = OrderedDict.fromkeys(self.configuration.input_parameters)

            # read header
            header = next(reader, None)
            if not header:
                raise IllegalArgumentError("Missing header in CSV file.")

            # number of columns must equal number of input parameters minus number of uri input parameters
            if not len(header) == len(input_parameter_indices) - len(uri_input_parameters):
                raise IllegalArgumentError("Wrong number of columns in CSV file.")

            # check if columns and parameters match, store indices
            for index in range(len(header)):
                if header[index] in input_parameter_indices.keys():
                    input_parameter_indices[header[index]] = index
                else:
                    raise IllegalArgumentError("Unknown column name in CSV file: " + header[index])

            # read CSV file
            predecessor = None
            current_index = 0
            for row in reader:
                # only read value from start_index to start_index+chunk_size-1 (if chunk_size is 0, read until the end)
                if current_index < self.start_index:
                    current_index += 1
                    continue
                elif (self.chunk_size != 0) and (current_index >= self.start_index+self.chunk_size):
                    current_index += 1
                    break

                if row:
                    # dictionary to store imported parameter values
                    input_parameter_values = OrderedDict.fromkeys(self.configuration.input_parameters)

                    # read parameters
                    for parameter in input_parameter_values.keys():
                        # if parameter was URI input parameter, get value from dict
                        if parameter in uri_input_parameters.keys():
                            value = uri_input_parameters[parameter]
                        else:  # get value from CSV
                            parameter_index = input_parameter_indices[parameter]
                            value = row[parameter_index]
                            # unescape escaped double quotes
                            value = str(value).replace("\"\"", "\"")
                        if value:
                            input_parameter_values[parameter] = value
                        else:
                            raise IllegalArgumentError("No value for parameter " + parameter)

                    # create entity from values in row
                    new_entity = Entity(self.configuration, input_parameter_values, predecessor)
                    predecessor = new_entity

                    # if ignore_input_duplicates is configured, check if entity already exists
                    if self.configuration.ignore_input_duplicates:
                        entity_exists = False
                        for entity in self.entities:
                            if entity.equals(new_entity):
                                entity_exists = True
                        if not entity_exists:
                            # add new entity to list
                            self.entities.append(new_entity)
                    else:  # ignore_input_duplicates is false
                        # add new entity to list
                        self.entities.append(new_entity)
                else:
                    raise IllegalArgumentError("Wrong CSV format.")

                current_index += 1

        logger.info(str(len(self.entities)) + " entities have been imported.")
Esempio n. 13
0
    def write_to_csv(self, output_dir, delimiter):
        """
        Export entities together with retrieved data to a CSV file.
        :param output_dir: Target directory for generated CSV file.
        :param delimiter: Column delimiter in CSV file (typically ',').
        """

        if len(self.entities) == 0:
            logger.info("Nothing to export.")
            return

        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        if self.chunk_size != 0:
            filename = '{0}_{1}-{2}.csv'.format(self.configuration.name, str(self.start_index),
                                                str(self.start_index + min(len(self.entities), self.chunk_size) - 1))
        else:
            filename = '{0}.csv'.format(self.configuration.name)

        file_path = os.path.join(output_dir, filename)

        # write entity list to UTF8-encoded CSV file (see also http://stackoverflow.com/a/844443)
        with codecs.open(file_path, 'w', encoding='utf8') as fp:
            logger.info('Exporting entities to ' + file_path + '...')
            writer = csv.writer(fp, delimiter=delimiter)

            # check if input and output parameters overlap -> validate these parameters later
            validation_parameters = OrderedSet(self.configuration.input_parameters).intersection(
                OrderedSet(self.configuration.output_parameter_mapping.keys())
            )

            # get column names for CSV file (start with input parameters)
            column_names = self.configuration.input_parameters + [
                parameter for parameter in self.configuration.output_parameter_mapping.keys()
                if parameter not in validation_parameters
            ]

            # check if an output parameter has been added and/or removed by a callback function and update column names
            parameters_removed = OrderedSet()
            parameters_added = OrderedSet()
            for entity in self.entities:
                parameters_removed.update(OrderedSet(self.configuration.output_parameter_mapping.keys()).difference(
                    OrderedSet(entity.output_parameters.keys()))
                )
                parameters_added.update(OrderedSet(entity.output_parameters.keys()).difference(
                    OrderedSet(self.configuration.output_parameter_mapping.keys()))
                )
            for parameter in parameters_removed:
                column_names.remove(parameter)
            for parameter in parameters_added:
                column_names.append(parameter)

            # write header of CSV file
            writer.writerow(column_names)

            for entity in self.entities:
                try:
                    row = OrderedDict.fromkeys(column_names)

                    # check validation parameters
                    for parameter in validation_parameters:
                        if entity.output_parameters[parameter]:
                            if str(entity.input_parameters[parameter]) == str(entity.output_parameters[parameter]):
                                logger.info("Validation of parameter " + parameter + " successful for entity "
                                            + str(entity) + ".")
                            else:
                                logger.error("Validation of parameter " + parameter + " failed for entity "
                                             + str(entity)
                                             + ": Expected: " + str(entity.input_parameters[parameter])
                                             + ", Actual: " + str(entity.output_parameters[parameter])
                                             + ". Retrieved value will be exported.")
                        else:
                            logger.error("Validation of parameter " + parameter + " failed for entity " + str(entity)
                                         + ": Empty value.")

                    # write data
                    for column_name in column_names:
                        if column_name in entity.output_parameters.keys():
                            row[column_name] = entity.output_parameters[column_name]
                        elif column_name in entity.input_parameters.keys():
                            row[column_name] = entity.input_parameters[column_name]

                    if len(row) == len(column_names):
                        writer.writerow(list(row.values()))
                    else:
                        raise IllegalArgumentError(str(len(column_names) - len(row)) + " parameter(s) is/are missing "
                                                                                       "for entity " + str(entity))

                except UnicodeEncodeError:
                    logger.error("Encoding error while writing data for entity: " + str(entity))

            logger.info(str(len(self.entities)) + ' entities have been exported.')