Beispiel #1
0
def normalize_headers(header_row, long_headers, long_to_short_dict):
    """ Clean the headers (remove extra spaces and lowercase) and convert them to short headers if we're given long
        headers

        Args:
            header_row: an array of the file headers given
            long_headers: boolean indicating if we're using the long versions of the headers (True for long)
            long_to_short_dict: a dictionary containing a mapping from long headers to short ones for this file type

        Yields:
            A string containing the cleaned header name (converted to short version if long versions were provided and
            there is a mapping for that header).
    """
    for header in header_row:
        header = FieldCleaner.clean_name(header)
        # Replace headers that don't match DB but are allowed by the broker with their DB matches
        if header == 'deobligationsrecoveriesrefundsofprioryearbyprogramobjectclass_cpe':
            header = 'deobligationsrecoveriesrefundsdofprioryearbyprogramobjectclass_cpe'
        elif header == 'facevalueloanguarantee':
            header = 'facevalueofdirectloanorloanguarantee'
        elif header == 'budgetauthorityavailableamounttotal_cpe':
            header = 'totalbudgetaryresources_cpe'
        elif header == 'correctionlatedeleteindicator':
            header = 'correctiondeleteindicator'
        elif header == 'place_of_performance_zip4':
            header = 'place_of_performance_zip4a'

        # yield the short header when applicable, otherwise yield the cleaned header, whatever it is
        if long_headers and header in long_to_short_dict:
            yield FieldCleaner.clean_name(long_to_short_dict[header])
        else:
            yield header
    def count_and_set_headers(self, csv_schema, header_row):
        """ Track how many times we've seen a field we were expecting and set self.expected_headers and
            self.flex_headers

            Args:
                csv_schema: list of FileColumn objects for this file type
                header_row: an array of the file headers given

            Returns:
                expected field dict {[expected field name]: [header count])
        """
        self.expected_headers = []
        self.flex_headers = []

        # Track how many times we've seen a field we were expecting. Keyed by the shorter, machine-readable column names
        expected_fields = OrderedDict()

        for schema in csv_schema:
            expected_fields[FieldCleaner.clean_name(schema.name_short)] = 0

        for header_value in header_row:
            if header_value not in expected_fields:
                # Add flex headers to flex list
                if str(header_value).startswith("flex_"):
                    self.flex_headers.append(header_value)
                else:
                    self.flex_headers.append(None)
                # Allow unexpected headers, just mark the header as None so we skip it when reading
                self.expected_headers.append(None)
            else:
                self.flex_headers.append(None)
                self.expected_headers.append(header_value)
                expected_fields[header_value] += 1
        return expected_fields
Beispiel #3
0
    def count_and_set_headers(self, csv_schema, header_row):
        """Track how many times we've seen a field we were expecting and set self.expected_headers and
        self.flex_headers"""
        self.expected_headers = []
        self.flex_headers = []

        # Track how many times we've seen a field we were expecting. Keyed by the shorter, machine-readable column names
        expected_fields = {}

        for schema in csv_schema:
            expected_fields[FieldCleaner.clean_name(schema.name_short)] = 0

        for header_value in header_row:
            if header_value not in expected_fields:
                # Add flex headers to flex list
                if str(header_value).startswith("flex_"):
                    self.flex_headers.append(header_value)
                else:
                    self.flex_headers.append(None)
                # Allow unexpected headers, just mark the header as None so we skip it when reading
                self.expected_headers.append(None)
            else:
                self.flex_headers.append(None)
                self.expected_headers.append(header_value)
                expected_fields[header_value] += 1
        return expected_fields
Beispiel #4
0
def use_long_headers(header_row, long_to_short_dict):
    """Check to see if header contains long or short column names"""
    col_matches = 0
    for value in header_row:
        if FieldCleaner.clean_name(value) in long_to_short_dict:
            col_matches += 1
    # if most of column headers are in the long format, we'll treat the file as having long headers
    return col_matches > .5 * len(header_row)