def __init__(self, file: IOBase): self.chunk_size = CHUNK_SIZE line_num = 1 """ lines_locations[i] stores the file-offset in bytes of line i for every i such that i-1 is a multiple of CHUNK_SIZE. For example, if CHUNK_SIZE == 1000, then the keys in lines_locations dictionary are 1, 1001, 2001, etc. """ self.lines_locations = {} while file.readline(): """ We iterate over the file and store in the map the locations doing steps of size CHUNK_SIZE. """ location = file.tell() if not (line_num - 1) % self.chunk_size: self.lines_locations[line_num] = location line_num += 1 self.file = file self.file.seek(0) self.header = file.readline() self.iter_line = 1 self.length = line_num - 2 return None
def read(self, stream: io.IOBase, SAMify: bool = True) -> bool: """ Read data into this object :param stream: The file object to read data from :param SAMify: Set to true to prepend any data in the description with 'CO:Z:' to allow for valid SAM records when forwarded by the aligner :return: True if successfully read data, False otherwise """ line1 = ' ' while line1.isspace(): # Ignore white space line1 = stream.readline().decode('ascii') if line1 == '': return False line2 = stream.readline().decode('ascii') if line2 == '': return False line3 = stream.readline().decode('ascii') if line3 == '': return False line4 = stream.readline().decode('ascii') if line4 == '': return False nameEnd = line1.find(' ') self.name = line1[1:nameEnd] self.desc1 = line1[nameEnd + 1:-1] self.seq = line2[:-1] nameEnd = line3.find(' ') self.desc2 = line3[nameEnd + 1:-1] if nameEnd > 0 else "" self.qual = line4[:-1] if SAMify: if self.desc1: self.desc1 = "CO:Z:" + self.desc1 if self.desc2: self.desc1 = "CO:Z:" + self.desc2 return True
def read_transactions(input_file: io.IOBase): ''' Read input file transactions in the format <sku 1 id >, <sku 2 id>, …. <sku N id> ''' print('started reading input') line = input_file.readline() while line is not '': # remove break lines and spaces yield line.strip().split(TRANSACTION_SPLITTER) line = input_file.readline() print('finished reading input')
def read_data(line: str, f: io.IOBase, num_peaks: int) -> Generator[Tuple[float], None, None]: mz = intensity = '' icol = False # whether we are in intensity column or not peaks_read = 0 while True: for char in line: if char in '()[]{}': # Ignore brackets continue elif char in ' \t,;:\n': # Delimiter if icol and mz and intensity: mz_f = float(mz) intensity_f = float(intensity) if mz_f > 0: yield mz_f, intensity_f peaks_read += 1 if peaks_read >= num_peaks: return mz = intensity = '' icol = not icol elif not icol: mz += char else: intensity += char line = f.readline() if not line: break if icol and mz and intensity: yield float(mz), float(intensity)
def tracingCPUMEM(vm: dict, f_input: IOBase, f_output: IOBase): # Checking file first line. line = f_input.readline() line_cols = line.split(';') if (line_cols[1] != CPU_MEM_START): sys.exit(f"Wrong file format:'{f_input.name}'.") previous_time = datetime.strptime(line_cols[0][0:30], CPU_MEM_TIME_MASK) total_time = timedelta() for line in f_input.readlines(): line_cols = line.split(';') # Checking file last line. if (line_cols[1] == CPU_MEM_END): break curr_time = datetime.strptime(line_cols[0][0:30], CPU_MEM_TIME_MASK) total_time += (curr_time - previous_time) tt_seconds = total_time.total_seconds() previous_time = curr_time # Loop through each vm entry in "line" for vm_entry in line_cols[1:]: vm_cols = vm_entry.split() # If entry corresponds to the current vm, register values. if (vm_cols[0] == vm["name"]): outputPAJEVariable(tt_seconds, vm["name"], 'MEM', vm_cols[1], f_output) outputPAJEVariable(tt_seconds, vm["name"], 'CPU', vm_cols[3], f_output)
def _parse_header_from_stream(self, stream: io.IOBase) -> Tuple[bool, int]: nbytes = 0 first_line = stream.readline() nbytes += len(first_line) if SPECTRUM_NAME_PRESENT.match(first_line) or START_OF_SPECTRUM_MARKER.match(first_line): return True, 0 elif START_OF_LIBRARY_MARKER.match(first_line): match = START_OF_LIBRARY_MARKER.match(first_line) version = match.group(1) attributes = AttributeManager() attributes.add_attribute(FORMAT_VERSION_TERM, version) line = stream.readline() while not (SPECTRUM_NAME_PRESENT.match(line) or START_OF_SPECTRUM_MARKER.match(line)): nbytes += len(line) match = key_value_term_pattern.match(line) if match is not None: d = match.groupdict() attributes.add_attribute( d['term'], try_cast(d['value'])) line = stream.readline() nbytes += len(line) continue if line.startswith("["): match = grouped_key_value_term_pattern.match(line) if match is not None: d = match.groupdict() attributes.add_attribute( d['term'], try_cast(d['value']), d['group_id']) attributes.group_counter = int(d['group_id']) line = stream.readline() nbytes += len(line) continue else: raise ValueError( f"Malformed grouped attribute {line}") elif "=" in line: name, value = line.split("=") attributes.add_attribute(name, value) else: raise ValueError(f"Malformed attribute line {line}") line = stream.readline() self.attributes.clear() self.attributes._from_iterable(attributes) return True, nbytes return False, 0
def parse(fp: IOBase): """Parses only RFC2822 headers from a file pointer. """ headers = Headers() while True: line = fp.readline(_MAXLINE + 1) if len(line) > _MAXLINE: raise client.LineTooLong("header line") headers.add_header_line(line) if len(headers) > _MAXHEADERS: raise client.HTTPException("got more than %d headers" % _MAXHEADERS) if line in (b'\r\n', b'\n', b''): break return headers
def _read_part_headers(stream: io.IOBase) -> int: total_bytes = 0 while True: line = stream.readline() bytes_read = len(line) total_bytes = total_bytes + bytes_read l_str = line.decode() # print(f"read_part_headers: line({len(line)} bytes): '{l_str}'") if bytes_read < 3: if l_str in ["\r\n", "\n"]: break if bytes_read == 0: raise Exception("unexpected 0-length line") return total_bytes
def text_to_node(self, iio: io.IOBase, parts_list): ''' CNLを読み込み ''' def find_part(name, path): if name != None: for y in parts_list: if y.name == name: return y elif path != None: for y in parts_list: if y.path == path: return y index = 0 while iio.readable(): line=iio.readline().strip() line = line.split(' ') if line[0] == 'None': index+=1 elif line[0] == '[Name]': name = line[1] elif line[0] == '[Path]': if len(line) == 1: path = '' else: path = line[1] elif line[0] == '[Child]': self.children[index].connect(find_part(name, path)) text_to_node(self.children[index], iio, parts_list) index+=1 elif line[0] == '[Parent]': return elif line[0] == 'MATERIAL': return
def read_data(line: str, f: io.IOBase, num_peaks: int) -> Generator[Tuple[float], None, None]: mz = intensity = '' icol = False # whether we are in intensity column or not peaks_read = 0 while True: if line == '\n': return if line[:5].upper() == 'NAME:': try: f.seek(f.tell()-len(line)-1, os.SEEK_SET) except io.UnsupportedOperation: pass return for char in line: if char in '()[]{}': # Ignore brackets continue elif char in ' \t,;:\n': # Delimiter if icol and mz and intensity: yield float(mz), float(intensity) peaks_read += 1 if peaks_read >= num_peaks: return mz = intensity = '' icol = not icol elif not icol: mz += char else: intensity += char line = f.readline() if not line: break if icol and mz and intensity: yield float(mz), float(intensity)
def header_and_file(infile: IOBase, bytesio=False, only_header=False) -> tuple: """opens filename and splits the header from the file. returns tuple of header(str) and file(bytes) file = file to split rootdir = the root of all media storage sent to the default_storage class bytesio = return file in bytesio mode instead of as bytes only_header = discard the file after reading and return a tuple of the header(str) and None """ infile.seek(0) header = b"" addnext = b"" #iterate until the end of the header while addnext != b"---END HEADER---\n": addnext = infile.readline() header += addnext #add the rest of the file to retfile if not only_header: if bytesio: retfile = infile else: retfile = infile.read() infile.close() return (header.decode("ascii"), None if only_header else retfile)
def _parse_header_from_stream(self, stream: io.IOBase) -> Tuple[bool, int]: first_line = stream.readline() if re.match("Name: ", first_line): return True, 0 return False, 0