Beispiel #1
0
def procall(fns):
    """
    Process a list of .mdd files
    :param fns: List of .mdd files to process
    :return: sections
    """
    # Prepare object to collect data into
    db = mdd_data.mdd_data()
    db.reset()
    sects = db.sects()
    stats = db.stats()
    changed_files = []
    
    # Ingest all sections in all input files
    for fn in fns:
        d = mdd(fn)
        for sect in d.sections:
            sect.glider = d.glider
            sect.time = d.time
            #print fn, sect.node, sect.start, sect.end
            # Basic validation: we know gliders make these...
            if sect.end <= sect.start:
                #print 'start > end?? node %d port %d start %d end %d' % (sect.node, sect.port, sect.start, sect.end)
                continue
            # Create new or open existing output file
            filename = 'node%dp%d.dat' % (sect.node, sect.port)
            ofn = mdd_config.datafile(filename)
            # keep track of which port 1 node files change so sio block parsing can be done on them
            if sect.port == 1 and filename not in changed_files:
                changed_files.append(filename)
            try:
                of = open(ofn, 'r+b')
            except IOError:
                of = open(ofn, 'wb')
            # If we are past end of file, fill with zeroes
            of.seek(0, 2)
            if sect.start > of.tell():
                of.write('\0' * (sect.start - of.tell()))
            # Write each section out at its address
            of.seek(sect.start)
            of.write(sect.data)
            of.close()
            # Keep metadata for what we have processed
            sects.append(sect)
            stats.accumulate(sect.node, sect.glider, 'bytes', 1 + sect.end - sect.start)
            stats.max(sect.node, sect.glider, 'last', sect.time)
        
    # Merge adjacent sections into one, start sorted by node/port/start
    sects.sort(lambda a, b: cmp(a.node, b.node) or cmp(a.port, b.port) or cmp(a.start, b.start))
    n = 0
    while n < len(sects) - 1:
        curr = sects[n]
        # Merge subsequent sections into this one until we can't anymore
        while n < len(sects) - 1:
            next_sect = sects[n + 1]
            if curr.node != next_sect.node or curr.port != next_sect.port:
                break
            elif curr.end < next_sect.start - 1:
                break
            curr.end = max(curr.end, next_sect.end)
            curr.time = max(curr.time, next_sect.time)
            del sects[n + 1]
        n += 1
    #print '\n'.join([repr(s) for s in sects])
    db.save()

    # the following section of code was added to parse the initially created node files,
    # locate complete sio blocks, and copy those into fixed instrument specific files
    sio_parse = SioParse()

    # loop over each node file that has changed with this run and parse it
    for changed_file in changed_files:
        sio_parse.parse_file(changed_file)

    # save the sio parse database
    sio_parse.save()

    return sects
Beispiel #2
0
# mdp_config.py:
# Read and parse gliders / nodes configuration file
# 08jul2013 [email protected] Initial

import mdd_config
import os
from xml.dom.minidom import parse

xml_config_fn = mdd_config.datafile('ooi.xml')


# All info about a node in an object (just data)
class nodeInfo(object):
    def __init__(self, n, posn, name, rate):
        self.id = int(n)
        self.lat = posn[0]
        self.lon = posn[1]
        self.depth = posn[2]
        self.name = name
        self.rate = rate


# Shortcut to get subelement from a node
def getElementData(doc, name):
    return str(doc.getElementsByTagName(name)[0].childNodes[0].data)


# Read xml config file into usable glider and node lists
def getSysConfig():
    config = parse(xml_config_fn).getElementsByTagName('modemConfig')[0]
    deployments = config.getElementsByTagName('deployment')
Beispiel #3
0
    def parse_file(self, file_name):
        """
        Find any complete sio blocks in input file and copy them to their respective output files
        :param file_name: The input file name to parse
        """
        # get the current file state from the dictionary, or initialize
        # it if it doesn't exist
        file_state = self.sio_db.get_file_state(file_name)
        if file_state is None:
            file_state = self.sio_db.init_file_state(file_name)

        # insert the file index at the end of the file name before the extension
        file_out_start = file_name[:-4] + '_' + str(file_state[StateKey.OUTPUT_INDEX])
        file_out_end = file_name[-4:]

        # get the full input file path and find the current file size
        full_path_in = mdd_config.datafile(file_name)
        file_len = os.stat(full_path_in).st_size

        # update the file size and unprocessed data based on the input file length
        if file_state[StateKey.UNPROCESSED_DATA] is None:
            file_state[StateKey.UNPROCESSED_DATA] = [[0, file_len]]
            file_state[StateKey.FILE_SIZE] = file_len
        else:
            self.update_state_file_length(file_state, file_len)

        # increment output index each time we read this file
        file_state[StateKey.OUTPUT_INDEX] += 1

        fid_in = open(full_path_in, 'rb')

        newly_processed_blocks = []
        # loop over unprocessed blocks
        for unproc in file_state[StateKey.UNPROCESSED_DATA]:
            # read the next unprocessed data block from the file
            fid_in.seek(unproc[START_IDX])
            block_len = unproc[END_IDX] - unproc[START_IDX]
            data_block = fid_in.read(block_len)

            # loop and find each sio header in this unprocessed block
            for match in SIO_HEADER_MATCHER.finditer(data_block):

                # get the file string associated with this instrument ID from the sio header
                file_type = ID_MAP.get(match.group(SIO_HEADER_GROUP_ID))
                # insert the file type into the file name
                full_path_out = mdd_config.datafile(file_out_start + '.' + file_type + file_out_end)

                # open the output file in append mode, creating if it doesn't exist
                fid_out = open(full_path_out, 'a+')

                # get length of data packet carried within this sio header
                data_len = int(match.group(SIO_HEADER_GROUP_DATA_LENGTH), 16)
                # end index relative to the unprocessed block
                end_block_idx = match.end(0) + data_len + 1
                # end index relative to the match
                end_match_idx = SIO_HEADER_LENGTH - 1 + data_len

                match_block = data_block[match.start(0):end_block_idx]
                orig_len = len(match_block)
                # replace escape modem chars
                match_block = match_block.replace(b'\x18\x6b', b'\x2b')
                match_block = match_block.replace(b'\x18\x58', b'\x18')
                # store how many chars were replaced in this block for updating the state
                n_replaced = orig_len - len(match_block)
                # need to increase block length if replaced characters to include the rest of the block
                match_block += data_block[end_block_idx:end_block_idx + n_replaced]

                if end_match_idx < len(match_block) and match_block[end_match_idx] == SIO_BLOCK_END:
                    # found the matching end of the packet, this block is complete,
                    # write it to output file
                    fid_out.write(match_block[:end_match_idx + 1])

                    # adjust the start and end indices to be relative to the file rather than the block
                    start_file_idx = match.start(0) + unproc[START_IDX]
                    end_file_idx = end_block_idx + n_replaced + unproc[START_IDX]
                    newly_processed_blocks.append([start_file_idx, end_file_idx])

                fid_out.close()

	# pre combine blocks so there aren't so many to loop over
        newly_processed_blocks = SioParse._combine_adjacent_packets(newly_processed_blocks)

	# remove the processed blocks from the unprocessed file state
        for new_block in newly_processed_blocks:
            self.update_processed_file_state(file_state, new_block[START_IDX], new_block[END_IDX])

        fid_in.close()
Beispiel #4
0
# mdd_data.py:
# Persistent data object for mdd data processor
# 08jul2013 [email protected]  Initial
# 04sep2013 [email protected]  only one database object, added offsets

import mdd_config
import os
import pickle

# Where we save data
dbfile = mdd_config.datafile('mdd.pckl')
db = None

class matrix(object):
    def __init__(self):
        self.x = {}
        self.y = set()
    
    # Increment value at (xkey, ykey, tag) by increment
    def accumulate(self, xkey, ykey, tag, increment = 1):
        if xkey not in self.x:
            self.x[xkey] = {}
        column = self.x[xkey]
        if ykey not in column:
            column[ykey] = {}
        loc = column[ykey]
        if ykey not in self.y:
            self.y.add(ykey)
        if tag not in loc:
            loc[tag] = 0
        loc[tag] += increment
Beispiel #5
0
SIO_HEADER_REGEX += b'[0-9A-Za-z]'          # MFLM Processing Flag (coded value)
SIO_HEADER_REGEX += b'([0-9a-fA-F]{8})'     # POSIX Timestamp of Controller (hex)
SIO_HEADER_REGEX += b'_'                    # Spacer (0x5F)
SIO_HEADER_REGEX += b'([0-9a-fA-F]{2})'     # Block Number (hex)
SIO_HEADER_REGEX += b'_'                    # Spacer (0x5F)
SIO_HEADER_REGEX += b'([0-9a-fA-F]{4})'     # CRC Checksum (hex)
SIO_HEADER_REGEX += b'\x02'                 # End of SIO Header (binary data follows)
SIO_HEADER_MATCHER = re.compile(SIO_HEADER_REGEX)

SIO_HEADER_LENGTH = 34

# sio header group match index
SIO_HEADER_GROUP_ID = 1           # Instrument ID
SIO_HEADER_GROUP_DATA_LENGTH = 2  # Number of Data Bytes

sio_db_file = mdd_config.datafile('sio.pckl')

# constants for accessing unprocessed data
START_IDX = 0
END_IDX = 1

# map of instrument ID to file type to place instrument data in
ID_MAP = {
    'AD': 'adcps',
    'CT': 'ctdmo',
    'CO': 'ctdmo',
    'DO': 'dosta',
    'FL': 'flort',
    'PH': 'phsen',
    'CS': 'status',
    'PS': 'status',
Beispiel #6
0
# mdd_data.py:
# Persistent data object for mdd data processor
# 08jul2013 [email protected]  Initial
# 04sep2013 [email protected]  only one database object, added offsets

import mdd_config
import os
import pickle

# Where we save data
dbfile = mdd_config.datafile('mdd.pckl')
db = None

class matrix(object):
    def __init__(self):
        self.x = {}
        self.y = set()
    
    # Increment value at (xkey, ykey, tag) by increment
    def accumulate(self, xkey, ykey, tag, increment = 1):
        if xkey not in self.x:
            self.x[xkey] = {}
        column = self.x[xkey]
        if ykey not in column:
            column[ykey] = {}
        loc = column[ykey]
        if ykey not in self.y:
            self.y.add(ykey)
        if tag not in loc:
            loc[tag] = 0
        loc[tag] += increment
Beispiel #7
0
    def parse_file(self, file_name):
        """
        Find any complete sio blocks in input file and copy them to their respective output files
        :param file_name: The input file name to parse
        """
        # get the current file state from the dictionary, or initialize
        # it if it doesn't exist
        file_state = self.sio_db.get_file_state(file_name)
        if file_state is None:
            file_state = self.sio_db.init_file_state(file_name)

        # insert the file index at the end of the file name before the extension
        file_out_start = file_name[:-4] + '_' + str(
            file_state[StateKey.OUTPUT_INDEX])
        file_out_end = file_name[-4:]

        # get the full input file path and find the current file size
        full_path_in = mdd_config.datafile(file_name)
        file_len = os.stat(full_path_in).st_size

        # update the file size and unprocessed data based on the input file length
        if file_state[StateKey.UNPROCESSED_DATA] is None:
            file_state[StateKey.UNPROCESSED_DATA] = [[0, file_len]]
            file_state[StateKey.FILE_SIZE] = file_len
        else:
            self.update_state_file_length(file_state, file_len)

        fid_in = open(full_path_in, 'rb')

        newly_processed_blocks = []
        # loop over unprocessed blocks
        for unproc in file_state[StateKey.UNPROCESSED_DATA]:
            # read the next unprocessed data block from the file
            fid_in.seek(unproc[START_IDX])
            block_len = unproc[END_IDX] - unproc[START_IDX]
            data_block = fid_in.read(block_len)

            # loop and find each sio header in this unprocessed block
            for match in SIO_HEADER_MATCHER.finditer(data_block):

                # get the file string associated with this instrument ID from the sio header
                file_type = ID_MAP.get(match.group(SIO_HEADER_GROUP_ID))

                # get length of data packet carried within this sio header
                data_len = int(match.group(SIO_HEADER_GROUP_DATA_LENGTH), 16)
                # end index relative to the unprocessed block
                end_block_idx = match.end(0) + data_len + 1
                # end index relative to the match
                end_match_idx = SIO_HEADER_LENGTH - 1 + data_len

                match_block = data_block[match.start(0):end_block_idx]
                orig_len = len(match_block)
                # replace escape modem chars
                match_block = match_block.replace(b'\x18\x6b', b'\x2b')
                match_block = match_block.replace(b'\x18\x58', b'\x18')
                # store how many chars were replaced in this block for updating the state
                n_replaced = orig_len - len(match_block)
                # need to increase block length if replaced characters to include the rest of the block
                match_block += data_block[end_block_idx:end_block_idx +
                                          n_replaced]

                if end_match_idx < len(match_block) and match_block[
                        end_match_idx] == SIO_BLOCK_END:
                    # found the matching end of the packet, this block is complete

                    # include controller / instrument number in file name so different instruments are in
                    # different files
                    ctrl_id = match.group(SIO_HEADER_GROUP_CTRL_ID)
                    file_out = file_out_start + '.' + file_type + '_' + ctrl_id + file_out_end

                    # insert the file type into the file name
                    full_path_out = mdd_config.datafile(file_out)

                    # open the output file in append mode, creating if it doesn't exist
                    fid_out = open(full_path_out, 'a+')

                    # write it to output file
                    fid_out.write(match_block[:end_match_idx + 1])

                    # adjust the start and end indices to be relative to the file rather than the block
                    start_file_idx = match.start(0) + unproc[START_IDX]
                    end_file_idx = end_block_idx + n_replaced + unproc[
                        START_IDX]
                    newly_processed_blocks.append(
                        [start_file_idx, end_file_idx])

                    fid_out.close()

        # check for newly processed blocks
        if newly_processed_blocks:
            # increment output index if we have found new data to parse in this file
            file_state[StateKey.OUTPUT_INDEX] += 1

            # pre combine blocks so there aren't so many to loop over
            newly_processed_blocks = SioParse._combine_adjacent_packets(
                newly_processed_blocks)

            # remove the processed blocks from the unprocessed file state
            for new_block in newly_processed_blocks:
                self.update_processed_file_state(file_state,
                                                 new_block[START_IDX],
                                                 new_block[END_IDX])

        fid_in.close()
Beispiel #8
0
SIO_HEADER_REGEX += b'([0-9a-fA-F]{8})'  # POSIX Timestamp of Controller (hex)
SIO_HEADER_REGEX += b'_'  # Spacer (0x5F)
SIO_HEADER_REGEX += b'([0-9a-fA-F]{2})'  # Block Number (hex)
SIO_HEADER_REGEX += b'_'  # Spacer (0x5F)
SIO_HEADER_REGEX += b'([0-9a-fA-F]{4})'  # CRC Checksum (hex)
SIO_HEADER_REGEX += b'\x02'  # End of SIO Header (binary data follows)
SIO_HEADER_MATCHER = re.compile(SIO_HEADER_REGEX)

SIO_HEADER_LENGTH = 34

# sio header group match index
SIO_HEADER_GROUP_ID = 1  # Instrument ID
SIO_HEADER_GROUP_DATA_LENGTH = 3  # Number of Data Bytes
SIO_HEADER_GROUP_CTRL_ID = 2  # controller and instrument number

sio_db_file = mdd_config.datafile('sio.pckl')

# constants for accessing unprocessed data
START_IDX = 0
END_IDX = 1

# map of instrument ID to file type to place instrument data in
ID_MAP = {
    'AD': 'adcps',
    'CT': 'ctdmo',
    'CO': 'ctdmo',
    'DO': 'dosta',
    'FL': 'flort',
    'PH': 'phsen',
    'CS': 'status',
    'PS': 'status',
Beispiel #9
0
def procall(fns):
    """
    Process a list of .mdd files
    :param fns: List of .mdd files to process
    :return: sections
    """
    # Prepare object to collect data into
    db = mdd_data.mdd_data()
    db.reset()
    sects = db.sects()
    stats = db.stats()
    changed_files = []
    
    # Ingest all sections in all input files
    for fn in fns:
        d = mdd(fn)
        for sect in d.sections:
            sect.glider = d.glider
            sect.time = d.time
            #print fn, sect.node, sect.start, sect.end
            # Basic validation: we know gliders make these...
            if sect.end <= sect.start:
                #print 'start > end?? node %d port %d start %d end %d' % (sect.node, sect.port, sect.start, sect.end)
                continue
            # Create new or open existing output file
            filename = 'node%dp%d.dat' % (sect.node, sect.port)
            ofn = mdd_config.datafile(filename)
            # keep track of which port 1 node files change so sio block parsing can be done on them
            if sect.port == 1 and filename not in changed_files:
                changed_files.append(filename)
            try:
                of = open(ofn, 'r+b')
            except IOError:
                of = open(ofn, 'wb')
            # If we are past end of file, fill with zeroes
            of.seek(0, 2)
            if sect.start > of.tell():
                of.write('\0' * (sect.start - of.tell()))
            # Write each section out at its address
            of.seek(sect.start)
            of.write(sect.data)
            of.close()
            # Keep metadata for what we have processed
            sects.append(sect)
            stats.accumulate(sect.node, sect.glider, 'bytes', 1 + sect.end - sect.start)
            stats.max(sect.node, sect.glider, 'last', sect.time)
        
    # Merge adjacent sections into one, start sorted by node/port/start
    sects.sort(lambda a, b: cmp(a.node, b.node) or cmp(a.port, b.port) or cmp(a.start, b.start))
    n = 0
    while n < len(sects) - 1:
        curr = sects[n]
        # Merge subsequent sections into this one until we can't anymore
        while n < len(sects) - 1:
            next_sect = sects[n + 1]
            if curr.node != next_sect.node or curr.port != next_sect.port:
                break
            elif curr.end < next_sect.start - 1:
                break
            curr.end = max(curr.end, next_sect.end)
            curr.time = max(curr.time, next_sect.time)
            del sects[n + 1]
        n += 1
    #print '\n'.join([repr(s) for s in sects])
    db.save()

    # the following section of code was added to parse the initially created node files,
    # locate complete sio blocks, and copy those into fixed instrument specific files
    sio_parse = SioParse()

    # loop over each node file that has changed with this run and parse it
    for changed_file in changed_files:
        sio_parse.parse_file(changed_file)

    # save the sio parse database
    sio_parse.save()

    return sects