def read_next_slice(self): """Reads raw data for a single slice. Returns: A list of records or a string representing the error if it applies. """ raw_records = [] records = [] if self._bucket is None: counter = 0 while counter < self._number_per_slice: line = self._file.readline() if line == '': self._file.close() self._eof = True break records.append(parse_csv_line(line)) counter += 1 return records if records else 'Empty file' if len(self._loaded_records) - 1 >= self._number_per_slice: records = [ parse_csv_line(line) for line in self._loaded_records[:self._number_per_slice] ] self._loaded_records = self._loaded_records[self._number_per_slice:] return records while len(self._loaded_records) + len( raw_records) - 2 < self._number_per_slice: try: end = self._file_pointer + self._number_per_slice * SIZE_ONE_LINE raw_records.extend( self._blob.download_as_string( start=self._file_pointer, end=end).decode().split('\n')) self._file_pointer = end + 1 except RequestRangeNotSatisfiable: self._eof = True break except NotFound: return 'File not found!' if raw_records and self._loaded_records: raw_records[0] = self._loaded_records[-1] + raw_records[0] self._loaded_records[-1] = '' for index in range(len(self._loaded_records)): record = parse_csv_line(self._loaded_records[index]) if record: records.append(record) for index, raw_record in enumerate(raw_records): if len(records) >= self._number_per_slice: self._loaded_records = raw_records[index:] break record = parse_csv_line(raw_record) if record: records.append(record) return records
def read_next_slice(self): """Reads raw data for a single slice. Download next chuck of size (SIZE_ONE_LINE * number_per_slice) bytes and update the self._file_pointer to the end of the chunk. if number of lines is equal or greater than self._number_per_slice, return self._number_per_slice slices and keep the rest. If less than self._number_per_slice, download again. Repeat this process until the whole data is downloaded. Returns: A list of records. """ raw_records = [] records = [] if len(self._loaded_records) - 1 >= self._number_per_slice: records = [ parse_csv_line(line) for line in self._loaded_records[:self._number_per_slice] ] self._loaded_records = self._loaded_records[self. _number_per_slice:] return records while len(self._loaded_records) + len( raw_records) - 2 < self._number_per_slice: try: end = self._file_pointer + self._number_per_slice * SIZE_ONE_LINE raw_records.extend( self._blob.download_as_string( start=self._file_pointer, end=end).decode().split('\n')) self._file_pointer = end + 1 except RequestRangeNotSatisfiable: self._eof = True break except NotFound: return None if raw_records and self._loaded_records: raw_records[0] = self._loaded_records[-1] + raw_records[0] self._loaded_records[-1] = '' for index in range(len(self._loaded_records)): record = parse_csv_line(self._loaded_records[index]) if record: records.append(record) for index, raw_record in enumerate(raw_records): if len(records) >= self._number_per_slice: self._loaded_records = raw_records[index:] break record = parse_csv_line(raw_record) if record: records.append(record) return records
def test_parse_csv_line(self, test_records, test_csv_records): """Tests on parse_csv_line""" for index in range(len(test_csv_records)): parsed_records = [ parse_csv_line(csv_single_line) for csv_single_line in test_csv_records[index].split('\n') ] assert parsed_records == test_records[:index + 1]
def read(self): """Reads records from slice file.""" lines = [] blob = self._bucket.blob(self._filename) lines = blob.download_as_string().decode().split('\n') for line in lines: record = parse_csv_line(line) if record: if self._start == -1: self._start = record[0] self._records[record[2]].append(record)
def read(self, start, end): """Reads and loads records from a set of slices, only records in the range are included. Args: start: An int for start time. end: An int for end time. """ for slice_path in self._filenames: lines = [] blob = self._bucket.blob(slice_path) lines = blob.download_as_string().decode().split('\n') for line in lines: record = parse_csv_line(line) if record and (start is None or start <= record[0]) and ( end is None or record[0] <= end): self._records[record[2]].append(record)
def read(self): """Reads records from slice file.""" if self._filename is None: return lines = [] if self._bucket is None: with open(self._filename, 'r') as filereader: lines = filereader.readlines() else: blob = self._bucket.blob(self._filename) lines = blob.download_as_string().decode().split('\n') for line in lines: record = parse_csv_line(line) if record: if self._start == -1: self._start = record[0] self._records[record[2]].append(record)
def test_parse_csv_line_error_cases(self, input_line): actual = parse_csv_line(input_line) assert actual is None