コード例 #1
0
ファイル: datafileloader.py プロジェクト: dayunxiang/SHIP
    def setupRowCollection():
        """Setup the RowDataCollection for loading the data into.
        """
        # First entry doesn't want to have a comma in front when formatting.
        row_collection = RowDataCollection()
        types = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1]

        # Do the first entry separately because it has a different format string
        row_collection.addToCollection(
            do.StringData(0, format_str='{0}', default=''))
        for i, t in enumerate(types, 1):
            if t == 0:
                row_collection.addToCollection(
                    do.StringData(i, format_str=', {0}', default=''))
            else:
                row_collection.addToCollection(
                    do.FloatData(i,
                                 format_str=', {0}',
                                 no_of_dps=3,
                                 default=0.00))

        # Add a couple of extra rows to the row_collection for tracking the
        # data in the file.
        row_collection.addToCollection(do.IntData('row_no'))

        return row_collection
コード例 #2
0
ファイル: datafileloader.py プロジェクト: dayunxiang/SHIP
def readTmfFile(datafile):
    """Loads the contents of the Materials CSV file referenced by datafile.
    
    Loads the data from the file referenced by the given TuflowFile object into
    a :class:'rowdatacollection' and a list of comment only lines.
    
    Args:
        datafile(TuflowFile): TuflowFile object with file details.
        
    Return:
        tuple: rowdatacollection, comment_lines(list).
        
    See Also:
        :class:'rowdatacollection'.
    """
    value_separator = ','
    comment_types = ['#', '!']
    tmf_enum = dataobj.TmfEnum()

    path = datafile.absolutePath()
    value_order = range(11)

    row_collection = RowDataCollection()
    row_collection.addToCollection(do.IntData(0, format_str=None, default=''))
    for i in range(1, 11):
        row_collection.addToCollection(
            do.FloatData(i, format_str=', {0}', default='', no_of_dps=3))

    # Keep track of any comment lines and the row numbers as well
    row_collection.addToCollection(
        do.StringData('comment', format_str=' ! {0}', default=''))
    row_collection.addToCollection(
        do.IntData('row_no', format_str=None, default=''))

    contents = []
    logger.info('Loading data file contents from disc - %s' % (path))
    contents = _loadFileFromDisc(path)

    # Stores the comments found in the file
    comment_lines = []

    # Loop through the contents list loaded from file line-by-line.
    first_data_line = False
    row_count = 0
    for i, line in enumerate(contents, 0):

        comment = hasCommentOnlyLine(line, comment_types)
        if comment or comment == '':
            comment_lines.append(comment)

        # If we have a line that isn't a comment or a blank then it is going
        # to contain materials entries.
        else:
            comment_lines.append(None)
            row_collection = _loadRowData(line, row_count, row_collection,
                                          tmf_enum.ITERABLE, comment_types,
                                          value_separator)
            row_count += 1

    # Just need to reset the has_changed variable because it will have been
    # set to True while loading everything in.
    for i in range(0, len(value_order)):
        row_collection.getDataObject(value_order[i]).has_changed = False

    return row_collection, comment_lines
コード例 #3
0
ファイル: datafileloader.py プロジェクト: dayunxiang/SHIP
def readMatSubfile(main_datafile, filename, header_list, args_dict):
    """
    """
    value_separator = ','
    comment_types = ['#', '!']
    mat_subfile_enum = dataobj.SubfileMatEnum()
    path = os.path.join(main_datafile.root, filename)
    root = main_datafile.root

    header1 = 'None'
    header2 = 'None'
    if len(header_list) > 0:
        header1 = header_list[0]
        if len(header_list) > 1:
            header2 = header_list[1]

    def _scanfile(filepath):
        """Scans the file before we do any loading to identify the contents.
        Need to do this because the file can be setup in so many way that it
        becomes a headache to work it out in advance. Better to take a little
        bit of extra processing time and do some quick checks first.
         
        Arguments:
            file_path (str): the path to the subfile.
        
        Return:
            tuple:
                 list: booleans with whether the column contains
                       data that we want or not.
                 int:  length of the cols list.
                 list: containing all of the first row column data
                 int:  first row with usable data on.
        """
        logger.debug('Scanning Materials file - %s' % (filepath))

        with open(filepath, 'rb') as csv_file:

            csv_file = csv.reader(csv_file)

            cols = []
            head_list = []
            start_row = -1
            for i, row in enumerate(csv_file, 0):
                if "".join(row).strip() == "":
                    break

                for j, col in enumerate(row, 0):
                    if i == 0:
                        cols.append(False)
                        head_list = row
                    elif uuf.isNumeric(col):
                        cols[j] = True
                        if start_row == -1:
                            start_row = i
                    elif cols[j] == True:
                        break

        return cols, len(cols), head_list, start_row

    def _loadHeadData(row, row_collection, col_length):
        """
        """
        new_row = [None] * 12

        comment_indices, length = uuf.findSubstringInList('!', row)
        comment_lines.append(None)

        head1_location = -1
        head2_location = -1
        row_length = len(row)
        for i in range(0, col_length):
            if i < row_length:
                entry = row[i].strip()
                if entry == header1:
                    head1_location = i
                if entry == header2:
                    head2_location = i
                row_collection._addValue('actual_header', entry)

        return row_collection, head1_location, head2_location

    def _loadRowData(row, row_count, row_collection, comment_lines, col_length,
                     start_row):
        """Loads the data in a specific row of the file.
        
        Args:
            row(list): containing the row data.
            row_count(int): the current row number.
            required_headers(list): column names that must exist.

        Return:
            rowdatacollection: updated with header row details.
        """
        # Any lines that aren't headers, but are above the first row to contain
        # actual data will be stored as comment lines
        if row_count < start_row:
            comment_lines.append(row)
            return row_collection, comment_lines
        else:
            comment_lines.append(None)

        if '!' in row[-1] or '#' in row[-1]:
            row_collection._addValue('comment', row[-1])

        # Add the row data in the order that it appears in the file
        # from left to right.
        for i in range(col_length):
            if i < len(row):
                row_collection._addValue(i, row[i])

        return row_collection, comment_lines

    try:
        logger.info('Loading data file contents from disc - %s' % (path))
        with open(path, 'rb') as csv_file:
            csv_file = csv.reader(csv_file)

            # Do a quick check of the file setup
            cols, col_length, head_list, start_row = _scanfile(path)

            # First entry doesn't want to have a comma in front when formatting.
            # but all of the others do.
            row_collection = RowDataCollection()
            row_collection.addToCollection(
                do.FloatData(0, format_str=' {0}', default='', no_of_dps=6))
            for i in range(1, len(cols)):
                if cols[i] == True:
                    row_collection.addToCollection(
                        do.FloatData(i,
                                     format_str=', {0}',
                                     default='',
                                     no_of_dps=6))
                else:
                    row_collection.addToCollection(
                        do.StringData(i, format_str=', {0}', default=''))

            row_collection.addToCollection(do.StringData('actual_header',
                                                         format_str='{0}',
                                                         default=''),
                                           index=0)
            row_collection.addToCollection(
                do.IntData('row_no', format_str=None, default=''))

            # Stores the comments found in the file
            comment_lines = []
            first_data_line = False
            # Loop through the contents list loaded from file line-by-line.
            for i, line in enumerate(csv_file, 0):

                comment = hasCommentOnlyLine(''.join(line), comment_types)
                if comment or comment == '':
                    comment_lines.append([comment, i])

                # If we have a line that isn't a comment or a blank then it is going
                # to contain materials entries.
                else:
                    # First non-comment is the headers
                    if first_data_line == False:
                        first_data_line = True
                        row_collection, head1_loc, head2_loc = _loadHeadData(
                            line, row_collection, col_length)
                    else:
                        row_collection, comment_lines = _loadRowData(
                            line, i, row_collection, comment_lines, col_length,
                            start_row)

                    row_collection._addValue('row_no', i)

    except IOError:
        logger.warning('Cannot load file - IOError')
        raise IOError('Cannot load file at: ' + path)

    path_holder = filetools.PathHolder(path, root)
    mat_sub = dataobj.DataFileSubfileMat(path_holder, row_collection,
                                         comment_lines, path_holder.filename,
                                         head1_loc, head2_loc)
    return mat_sub
コード例 #4
0
ファイル: datafileloader.py プロジェクト: dayunxiang/SHIP
def readMatCsvFile(datafile, args_dict={}):
    """Loads the contents of the Materials CSV file referenced by datafile.
    
    Loads the data from the file referenced by the given TuflowFile object into
    a :class:'rowdatacollection' and a list of comment only lines.
    
    Args:
        datafile(TuflowFile): TuflowFile object with file details.
        
    Return:
        tuple: rowdatacollection, comment_lines(list).
        
    See Also:
        :class:'rowdatacollection'.
    """
    value_seperator = ','
    comment_types = ['#', '!']
    csv_enum = dataobj.MatCsvEnum()
    subfile_details = {}

    def _loadHeadData(row, row_collection):
        """
        """
        new_row = [None] * 12

        if '!' in row[-1] or '#' in row[-1]:
            row_collection._addValue('comment', row[-1])

        new_row[0] = row[0]
        new_row[1] = row[1]
        new_row[9] = row[2]
        new_row[11] = row[3]

        row_length = len(new_row)
        for i, v in enumerate(new_row):
            if i < row_length:
                row_collection._addValue('actual_header', new_row[i])

        return row_collection

    def _disectEntry(col_no, entry, new_row):
        """Breaks the row values into the appropriate object values.
        
        The materials file can have Excel style sub-values. i.e. it can have
        seperate columns defined within a bigger one. This function will break
        those values down into a format usable by the values initiated in the
        rowdatacollection.
        
        Args:
            col_no(int): the current column number.
            entry(string): the value of the current column.
            new_row(list): the row values to update.
            
        Return:
            list containing the updated row values.
        
        Note:
            This isn't very nice. Need to clean it up and find a better, safer
            way of dealing with breaking the row data up. It may be excess work
            but perhaps creating an xml converter could work quite will and
            make dealing with the file a bit easier?
        """
        made_change = False

        # Put in ID and Hazard as normal
        if col_no == 0:
            new_row[0] = entry
        elif col_no == 11:
            new_row[11] = entry
        # Possible break up Manning's entry further
        elif col_no == 1:
            # See if there's more than one value in the Manning's category.
            splitval = entry.split(',')

            # If there is and it's numeric then it's a single value for 'n'
            if len(splitval) == 1:
                if uuf.isNumeric(splitval[0]):
                    new_row[1] = splitval[0]

                # Otherwise it's a filename. These can be further separated
                # into two column headers to read from the sub files.
                else:
                    strsplit = splitval[0].split('|')
                    if len(strsplit) == 1:
                        subfile_details[strsplit[0].strip()] = []
                        new_row[6] = strsplit[0].strip()
                    elif len(strsplit) == 2:
                        subfile_details[strsplit[0]] = [strsplit[1].strip()]
                        new_row[6] = strsplit[0].strip()
                        new_row[7] = strsplit[1].strip()
                    else:
                        subfile_details[strsplit[0]] = [
                            strsplit[1].strip(), strsplit[2].strip()
                        ]
                        new_row[6] = strsplit[0].strip()
                        new_row[7] = strsplit[1].strip()
                        new_row[8] = strsplit[2].strip()

            # If there's more than one value then it must be the Manning's
            # depth curve values (N1, Y1, N2, Y2).
            else:
                new_row[2] = splitval[0]
                new_row[3] = splitval[1]
                new_row[4] = splitval[2]
                new_row[5] = splitval[3]

        # Finally grab the infiltration parameters (IL, CL)
        elif col_no == 2:
            splitval = entry.split(',')
            new_row[9] = splitval[0]
            new_row[10] = splitval[1]

        return new_row

    def _loadRowData(row, row_count, row_collection):
        """Loads the data in a specific row of the file.
        
        Args:
            row(list): containing the row data.
            row_count(int): the current row number.
            required_headers(list): column names that must exist.

        Return:
            rowdatacollection: updated with header row details.
        """
        if '!' in row[-1] or '#' in row[-1]:
            row_collection._addValue('comment', row[-1])
        new_row = [None] * 12

        # Add the row data in the order that it appears in the file
        # from left to right.
        for i in csv_enum.ITERABLE:
            if i < len(row):
                new_row = _disectEntry(i, row[i], new_row)

        for val, item in enumerate(new_row):
            row_collection._addValue(val, item)

    # First entry doesn't want to have a comma in front when formatting.
    row_collection = RowDataCollection()
    types = [1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0]

    # Do the first entry separately because it has a different format string
    row_collection.addToCollection(
        do.StringData(0, format_str='{0}', default=''))
    for i, t in enumerate(types, 1):
        if t == 0:
            row_collection.addToCollection(
                do.StringData(i, format_str=', {0}', default=''))
        else:
            row_collection.addToCollection(
                do.FloatData(i, format_str=', {0}', default='', no_of_dps=3))

    # Add a couple of extra rows to the row_collection for tracking the
    # data in the file.
    row_collection.addToCollection(
        do.StringData('comment', format_str='{0}', default=''))
    row_collection.addToCollection(
        do.StringData('actual_header', format_str='{0}', default=''))
    row_collection.addToCollection(
        do.IntData('row_no', format_str=None, default=''))

    path = datafile.absolutePath()
    try:
        logger.info('Loading data file contents from disc - %s' % (path))
        with open(path, 'rb') as csv_file:
            csv_file = csv.reader(csv_file)

            # Stores the comments found in the file
            comment_lines = []
            first_data_line = False
            line_count = 0

            try:
                # Loop through the contents list loaded from file line-by-line.
                for i, line in enumerate(csv_file, 0):

                    comment = hasCommentOnlyLine(''.join(line), comment_types)
                    if comment or comment == '':
                        comment_lines.append(comment)

                    # If we have a line that isn't a comment or a blank then it is going
                    # to contain materials entries.
                    else:
                        # First non-comment is the headers
                        if first_data_line == False:
                            first_data_line = True
                            _loadHeadData(line, row_collection)
                        else:
                            _loadRowData(line, i, row_collection)

                        row_collection._addValue('row_no', line_count)
                        line_count += 1
                        comment_lines.append(None)
            except IndexError:
                logger.error(
                    'This file is not setup/formatted correctly for a Materials.CSV file:\n'
                    + path)
                raise IndexError(
                    'File is not correctly formatted for a Materials.csv file')
            except AttributeError:
                logger.error(
                    'This file is not setup/formatted correctly for a Materials.CSV file:\n'
                    + path)
                raise AttributeError(
                    'File is not correctly formatted for a Materials.csv file')

    except IOError:
        logger.warning('Cannot load file - IOError')
        raise IOError('Cannot load file at: ' + path)

    # Just need to reset the has_changed variable because it will have been
    # set to True while loading everything in.
    for i in range(0, len(csv_enum.ITERABLE)):
        row_collection.getDataObject(i).has_changed = False

    return row_collection, comment_lines, subfile_details
コード例 #5
0
ファイル: datafileloader.py プロジェクト: dayunxiang/SHIP
def readBcFile(datafile, args_dict={}):
    """Loads the contents of the BC Database file refernced by datafile.
    
    Loads the data from the file referenced by the given TuflowFile object into
    a :class:'rowdatacollection' and a list of comment only lines.
    
    Args:
        datafile(TuflowFile): TuflowFile object with file details.
        
    Return:
        tuple: rowdatacollection, comment_lines(list).
        
    See Also:
        :class:'rowdatacollection'.
    """
    value_seperator = ','
    comment_types = ['#', '!']
    bc_enum = dataobj.BcEnum()
    bc_event_data = args_dict

    def _checkHeaders(row, required_headers):
        """Checks that any required headers can be found.
        
        Reviews the headers in the header row of the csv file to ensure that
        any specifically needed named column headers exist.
        
        Args:
            row(list): columns headers.
            required_headers(list): column names that must be included.
        
        Return:
            list if some headers not found of False otherwise.
        """
        # Check what we have in the header row
        head_check = True
        for r in required_headers:
            if not r in row:
                head_check = False
        if not head_check:
            logger.warning('Required header (' + r + ') not' +
                           'found in file: ' + path)
        return head_check

    def _loadHeadData(row, row_collection, required_headers):
        """Loads the column header data.
        
        Adds the file defined names for the headers to the rowdatacollection.
        
        Args:
            row(list): containing the row data.
            row_collection(rowdatacollection): for updating.
            required_headers(list): column names that must exist.
        
        Return:
            rowdatacollection: updated with header row details.
        """
        row_length = len(row)
        head_check = _checkHeaders(row, required_headers)
        for i, v in enumerate(bc_enum.ITERABLE):
            if i < row_length:
                row_collection._addValue('actual_header', row[i])

        return row_collection

    def _loadRowData(row, row_count, row_collection):
        """Loads the data in a specific row of the file.
        
        Args:
            row(list): containing the row data.
            row_count(int): the current row number.
            required_headers(list): column names that must exist.

        Return:
            rowdatacollection: updated with header row details.
        """
        if '!' in row[-1] or '#' in row[-1]:
            row_collection._addValue('comment', row[-1])

        # Add the row data in the order that it appears in the file
        # from left to right.
        for i in bc_enum.ITERABLE:
            if i < len(row):
                row_collection._addValue(i, row[i])

        return row_collection

    # Initialise the RowDataOjectCollection object with currect setup
    row_collection = RowDataCollection()
    for i, val in enumerate(bc_enum.ITERABLE):
        if i == 0:
            row_collection.addToCollection(
                do.StringData(i, format_str='{0}', default=''))
        else:
            row_collection.addToCollection(
                do.StringData(i, format_str=', {0}', default=''))

    row_collection.addToCollection(do.StringData('actual_header',
                                                 format_str=', {0}',
                                                 default=''),
                                   index=0)
    row_collection.addToCollection(
        do.IntData('row_no', format_str=None, default=''))

    path = datafile.absolutePath()
    required_headers = ['Name', 'Source']
    try:
        logger.info('Loading data file contents from disc - %s' % (path))
        with open(path, 'rU') as csv_file:
            csv_file = csv.reader(csv_file)

            # Stores the comments found in the file
            comment_lines = []
            first_data_line = False
            row_count = 0
            # Loop through the contents list loaded from file line-by-line.
            for i, line in enumerate(csv_file, 0):

                comment = hasCommentOnlyLine(''.join(line), comment_types)
                if comment or comment == '':
                    comment_lines.append(comment)

                # If we have a line that isn't a comment or a blank then it is going
                # to contain materials entries.
                else:
                    # First non-comment is the headers
                    if first_data_line == False:
                        first_data_line = True
                        row_collection = _loadHeadData(line, row_collection,
                                                       required_headers)
                    else:
                        row_collection = _loadRowData(line, i, row_collection)
                        row_collection._addValue('row_no', row_count)
                        row_count += 1

                    comment_lines.append(None)

    except IOError:
        logger.warning('Cannot load file - IOError')
        raise IOError('Cannot load file at: ' + path)

    # Just need to reset the has_changed variable because it will have been
    # set to True while loading everything in.
    for i in range(0, len(bc_enum.ITERABLE)):
        row_collection.dataObject(i).has_changed = False

    return row_collection, comment_lines