def __init__(self, url=None, header_size=17): """ Constructor for the parser. Initializes headers and data @param url the url/filepath of the file @param header_size number of header lines. This is information is in the header already, so it will be removed """ if not url: raise SlocumParseException('Must provide a filename') self.header_size = int(header_size) sb = None try: # Get a byte-string generator for use in the data-retrieval loop (to avoid opening the file every time) sb = get_sbuffer(url) sb.seek(0) for x in xrange(self.header_size - 3): line = sb.readline() key, value = line.split(':', 1) self.header_map[key.strip()] = value.strip() # Collect the sensor names & units sensor_names = sb.readline().split() units = sb.readline().split() # Keep track of the intended data type for each sensor dtypes = [] for d in sb.readline().split(): if d is '1': dtypes.append('byte') elif d is '2': dtypes.append('short') elif d is '4': dtypes.append('float') elif d is '8': dtypes.append('double') assert len(sensor_names) == len(units) == len(dtypes) for i in xrange(len(sensor_names)): sb.seek(0) self.sensor_map[sensor_names[i]] = (units[i], dtypes[i]) dat = np.genfromtxt(fname=sb, skip_header=self.header_size, usecols=i, dtype=dtypes[i], missing_values='NaN') # ,usemask=True) self.data_map[sensor_names[i]] = dat finally: if not sb is None: sb.close()
def test_get_sbuffer_http(self, requests_mock, get_type_mock, StringIO_mock): retval = MagicMock(spec=requests.models.Response) retval.url = 'http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/' retval.content = '<http><body>'\ '<a href="RDLm_BELM_2012_08_14_1200.ruv">RDLm_BELM_2012_08_14_1200.ruv</a> '\ '14-Aug-2012 08:42 88K \n<img src="/icons/unknown.gif" alt="[ ]"> '\ '<a href="RDLm_BELM_2012_08_14_1300.ruv">RDLm_BELM_2012_08_14_1300.ruv</a> '\ '14-Aug-2012 09:41 90K \n</body></html>' requests_mock.return_value = retval get_type_mock.return_value = 'http' StringIO_mock.return_value = MagicMock(spec=StringIO) self.assertTrue(isinstance(get_sbuffer(url=retval.url), StringIO))
def __init__(self, url=None, header_size=17): """ Constructor for the parser. Initializes headers and data @param url the url/filepath of the file @param header_size number of header lines. This is information is in the header already, so it will be removed """ if not url: raise SlocumParseException('Must provide a filename') self.header_size = int(header_size) sb = None try: # Get a byte-string generator for use in the data-retrieval loop (to avoid opening the file every time) sb = get_sbuffer(url) sb.seek(0) for x in xrange(self.header_size - 3): line = sb.readline() key, value = line.split(':', 1) self.header_map[key.strip()] = value.strip() # Collect the sensor names & units sensor_names = sb.readline().split() units = sb.readline().split() # Keep track of the intended data type for each sensor dtypes = [] for d in sb.readline().split(): if d is '1': dtypes.append('byte') elif d is '2': dtypes.append('short') elif d is '4': dtypes.append('float') elif d is '8': dtypes.append('double') assert len(sensor_names) == len(units) == len(dtypes) for i in xrange(len(sensor_names)): sb.seek(0) self.sensor_map[sensor_names[i]] = (units[i], dtypes[i]) dat = np.genfromtxt(fname=sb, skip_header=self.header_size, usecols=i, dtype=dtypes[i], missing_values='NaN') # ,usemask=True) self.data_map[sensor_names[i]] = dat finally: if not sb is None: sb.close()
def __init__(self, url): fstr = None sb = None try: sb = get_sbuffer(url) fstr = sb.read() finally: if not sb is None: sb.close() if not fstr: raise RuvParseException('Error reading file: {0}'.format(url)) # Verify that this is a CTF v 1.00 file ctf_m = self._ctf_re.search(fstr) if ctf_m is None: raise RuvParseException( '\'{0}\' does not have %CTF flag'.format(url)) if not ctf_m.group(1) == '1.00': raise RuvParseException('\'{0}\' not CTF version 1.00'.format(url)) # Find the TableTypes - allows discovery of other pieces tbl_types = self._tbl_type_re.finditer(fstr) if not tbl_types: raise RuvParseException( '\'{0}\' does not contain %TableTypes keywords'.format(url)) ttps = [] for m in tbl_types: ttps.append((m.group(0), m.group(1), m.start(), m.end())) tbl_ends = self._tbl_end_re.finditer(fstr) if not tbl_ends: raise RuvParseException( '\'{0}\' does not contain any %TableEnd keywords'.format(url)) tends = [] for m in tbl_ends: tends.append((m.group(0), m.end())) # There should be the same number of results for ttype & tend assert len(ttps) == len(tends) header_str = fstr[0:ttps[0][2]] self._parse_header(header_str) for i in xrange(len(ttps)): key = ttps[i][1] self._parse_table(key, fstr[ttps[i][2]:tends[i][1]])
def __init__(self, url): fstr = None sb = None try: sb = get_sbuffer(url) fstr = sb.read() finally: if not sb is None: sb.close() if not fstr: raise RuvParseException('Error reading file: {0}'.format(url)) # Verify that this is a CTF v 1.00 file ctf_m = self._ctf_re.search(fstr) if ctf_m is None: raise RuvParseException('\'{0}\' does not have %CTF flag'.format(url)) if not ctf_m.group(1) == '1.00': raise RuvParseException('\'{0}\' not CTF version 1.00'.format(url)) # Find the TableTypes - allows discovery of other pieces tbl_types = self._tbl_type_re.finditer(fstr) if not tbl_types: raise RuvParseException('\'{0}\' does not contain %TableTypes keywords'.format(url)) ttps = [] for m in tbl_types: ttps.append((m.group(0), m.group(1), m.start(), m.end())) tbl_ends = self._tbl_end_re.finditer(fstr) if not tbl_ends: raise RuvParseException('\'{0}\' does not contain any %TableEnd keywords'.format(url)) tends = [] for m in tbl_ends: tends.append((m.group(0), m.end())) # There should be the same number of results for ttype & tend assert len(ttps) == len(tends) header_str = fstr[0:ttps[0][2]] self._parse_header(header_str) for i in xrange(len(ttps)): key = ttps[i][1] self._parse_table(key, fstr[ttps[i][2]:tends[i][1]])
def test_get_sbuffer_ftp(self): with self.assertRaises(NotImplementedError): get_sbuffer(url='http://marine.rutgers.edu/cool/maracoos/codar/ooi/radials/BELM/', type='ftp')