Example #1
0
    def read_next_slice(self):
        """Reads raw data for a single slice.

        Returns:
            A list of records or a string representing the error if it applies.
        """
        raw_records = []
        records = []

        if self._bucket is None:
            counter = 0
            while counter < self._number_per_slice:
                line = self._file.readline()
                if line == '':
                    self._file.close()
                    self._eof = True
                    break
                records.append(parse_csv_line(line))
                counter += 1
            return records if records else 'Empty file'

        if len(self._loaded_records) - 1 >= self._number_per_slice:
            records = [
                parse_csv_line(line)
                for line in self._loaded_records[:self._number_per_slice]
            ]
            self._loaded_records = self._loaded_records[self._number_per_slice:]
            return records

        while len(self._loaded_records) + len(
                raw_records) - 2 < self._number_per_slice:
            try:
                end = self._file_pointer + self._number_per_slice * SIZE_ONE_LINE
                raw_records.extend(
                    self._blob.download_as_string(
                        start=self._file_pointer, end=end).decode().split('\n'))
                self._file_pointer = end + 1
            except RequestRangeNotSatisfiable:
                self._eof = True
                break
            except NotFound:
                return 'File not found!'
        if raw_records and self._loaded_records:
            raw_records[0] = self._loaded_records[-1] + raw_records[0]
            self._loaded_records[-1] = ''

        for index in range(len(self._loaded_records)):
            record = parse_csv_line(self._loaded_records[index])
            if record:
                records.append(record)
        for index, raw_record in enumerate(raw_records):
            if len(records) >= self._number_per_slice:
                self._loaded_records = raw_records[index:]
                break
            record = parse_csv_line(raw_record)
            if record:
                records.append(record)
        return records
Example #2
0
    def read_next_slice(self):
        """Reads raw data for a single slice.

        Download next chuck of size (SIZE_ONE_LINE * number_per_slice) bytes
        and update the self._file_pointer to the end of the chunk.
        if number of lines is equal or greater than self._number_per_slice,
        return self._number_per_slice slices and keep the rest. If less than
        self._number_per_slice, download again.
        Repeat this process until the whole data is downloaded.

        Returns:
            A list of records.
        """
        raw_records = []
        records = []

        if len(self._loaded_records) - 1 >= self._number_per_slice:
            records = [
                parse_csv_line(line)
                for line in self._loaded_records[:self._number_per_slice]
            ]
            self._loaded_records = self._loaded_records[self.
                                                        _number_per_slice:]
            return records

        while len(self._loaded_records) + len(
                raw_records) - 2 < self._number_per_slice:
            try:
                end = self._file_pointer + self._number_per_slice * SIZE_ONE_LINE
                raw_records.extend(
                    self._blob.download_as_string(
                        start=self._file_pointer,
                        end=end).decode().split('\n'))
                self._file_pointer = end + 1
            except RequestRangeNotSatisfiable:
                self._eof = True
                break
            except NotFound:
                return None
        if raw_records and self._loaded_records:
            raw_records[0] = self._loaded_records[-1] + raw_records[0]
            self._loaded_records[-1] = ''

        for index in range(len(self._loaded_records)):
            record = parse_csv_line(self._loaded_records[index])
            if record:
                records.append(record)
        for index, raw_record in enumerate(raw_records):
            if len(records) >= self._number_per_slice:
                self._loaded_records = raw_records[index:]
                break
            record = parse_csv_line(raw_record)
            if record:
                records.append(record)
        return records
Example #3
0
 def test_parse_csv_line(self, test_records, test_csv_records):
     """Tests on parse_csv_line"""
     for index in range(len(test_csv_records)):
         parsed_records = [
             parse_csv_line(csv_single_line)
             for csv_single_line in test_csv_records[index].split('\n')
         ]
         assert parsed_records == test_records[:index + 1]
Example #4
0
    def read(self):
        """Reads records from slice file."""

        lines = []

        blob = self._bucket.blob(self._filename)
        lines = blob.download_as_string().decode().split('\n')
        for line in lines:
            record = parse_csv_line(line)
            if record:
                if self._start == -1:
                    self._start = record[0]
                self._records[record[2]].append(record)
    def read(self, start, end):
        """Reads and loads records from a set of slices, only records in the range
        are included.

        Args:
            start: An int for start time.
            end: An int for end time.
        """
        for slice_path in self._filenames:
            lines = []
            blob = self._bucket.blob(slice_path)
            lines = blob.download_as_string().decode().split('\n')
            for line in lines:
                record = parse_csv_line(line)
                if record and (start is None or start <= record[0]) and (
                        end is None or record[0] <= end):
                    self._records[record[2]].append(record)
 def read(self):
     """Reads records from slice file."""
     if self._filename is None:
         return
     lines = []
     if self._bucket is None:
         with open(self._filename, 'r') as filereader:
             lines = filereader.readlines()
     else:
         blob = self._bucket.blob(self._filename)
         lines = blob.download_as_string().decode().split('\n')
     for line in lines:
         record = parse_csv_line(line)
         if record:
             if self._start == -1:
                 self._start = record[0]
             self._records[record[2]].append(record)
    def test_parse_csv_line_error_cases(self, input_line):
        actual = parse_csv_line(input_line)

        assert actual is None