Esempio n. 1
0
 def __init__(self, file: IOBase):
     self.chunk_size = CHUNK_SIZE
     line_num = 1
     """
     lines_locations[i] stores the file-offset in bytes of line i for every i such that
     i-1 is a multiple of CHUNK_SIZE.
     For example, if CHUNK_SIZE == 1000, then the keys in lines_locations dictionary
     are 1, 1001, 2001, etc.
     """
     self.lines_locations = {}
     while file.readline():
         """
         We iterate over the file and store in the map the locations doing
         steps of size CHUNK_SIZE.
         """
         location = file.tell()
         if not (line_num - 1) % self.chunk_size:
             self.lines_locations[line_num] = location
         line_num += 1
     self.file = file
     self.file.seek(0)
     self.header = file.readline()
     self.iter_line = 1
     self.length = line_num - 2
     return None
Esempio n. 2
0
    def read(self, stream: io.IOBase, SAMify: bool = True) -> bool:
        """
        Read data into this object
        :param stream: The file object to read data from
        :param SAMify: Set to true to prepend any data in the description with 'CO:Z:' to allow for valid SAM records when forwarded by the aligner
        :return: True if successfully read data, False otherwise
        """
        line1 = ' '
        while line1.isspace():  # Ignore white space
            line1 = stream.readline().decode('ascii')
        if line1 == '': return False
        line2 = stream.readline().decode('ascii')
        if line2 == '': return False
        line3 = stream.readline().decode('ascii')
        if line3 == '': return False
        line4 = stream.readline().decode('ascii')
        if line4 == '': return False

        nameEnd = line1.find(' ')
        self.name = line1[1:nameEnd]
        self.desc1 = line1[nameEnd + 1:-1]
        self.seq = line2[:-1]
        nameEnd = line3.find(' ')
        self.desc2 = line3[nameEnd + 1:-1] if nameEnd > 0 else ""
        self.qual = line4[:-1]

        if SAMify:
            if self.desc1:
                self.desc1 = "CO:Z:" + self.desc1
            if self.desc2:
                self.desc1 = "CO:Z:" + self.desc2
        return True
Esempio n. 3
0
def read_transactions(input_file: io.IOBase):
    '''
		Read input file transactions in the format <sku 1 id >, <sku 2 id>, …. <sku N id> 
	'''
    print('started reading input')
    line = input_file.readline()
    while line is not '':
        # remove break lines and spaces
        yield line.strip().split(TRANSACTION_SPLITTER)
        line = input_file.readline()
    print('finished reading input')
Esempio n. 4
0
def read_data(line: str, f: io.IOBase,
              num_peaks: int) -> Generator[Tuple[float], None, None]:
    mz = intensity = ''
    icol = False  # whether we are in intensity column or not
    peaks_read = 0

    while True:
        for char in line:
            if char in '()[]{}':  # Ignore brackets
                continue
            elif char in ' \t,;:\n':  # Delimiter
                if icol and mz and intensity:
                    mz_f = float(mz)
                    intensity_f = float(intensity)
                    if mz_f > 0:
                        yield mz_f, intensity_f
                    peaks_read += 1
                    if peaks_read >= num_peaks:
                        return
                    mz = intensity = ''
                icol = not icol
            elif not icol:
                mz += char
            else:
                intensity += char

        line = f.readline()
        if not line:
            break

    if icol and mz and intensity:
        yield float(mz), float(intensity)
Esempio n. 5
0
def tracingCPUMEM(vm: dict, f_input: IOBase, f_output: IOBase):

    # Checking file first line.
    line = f_input.readline()
    line_cols = line.split(';')

    if (line_cols[1] != CPU_MEM_START):
        sys.exit(f"Wrong file format:'{f_input.name}'.")

    previous_time = datetime.strptime(line_cols[0][0:30], CPU_MEM_TIME_MASK)
    total_time = timedelta()

    for line in f_input.readlines():
        line_cols = line.split(';')

        # Checking file last line.
        if (line_cols[1] == CPU_MEM_END):
            break

        curr_time = datetime.strptime(line_cols[0][0:30], CPU_MEM_TIME_MASK)
        total_time += (curr_time - previous_time)
        tt_seconds = total_time.total_seconds()
        previous_time = curr_time

        # Loop through each vm entry in "line"
        for vm_entry in line_cols[1:]:
            vm_cols = vm_entry.split()

            # If entry corresponds to the current vm, register values.
            if (vm_cols[0] == vm["name"]):
                outputPAJEVariable(tt_seconds, vm["name"], 'MEM', vm_cols[1],
                                   f_output)
                outputPAJEVariable(tt_seconds, vm["name"], 'CPU', vm_cols[3],
                                   f_output)
Esempio n. 6
0
 def _parse_header_from_stream(self, stream: io.IOBase) -> Tuple[bool, int]:
     nbytes = 0
     first_line = stream.readline()
     nbytes += len(first_line)
     if SPECTRUM_NAME_PRESENT.match(first_line) or START_OF_SPECTRUM_MARKER.match(first_line):
         return True, 0
     elif START_OF_LIBRARY_MARKER.match(first_line):
         match = START_OF_LIBRARY_MARKER.match(first_line)
         version = match.group(1)
         attributes = AttributeManager()
         attributes.add_attribute(FORMAT_VERSION_TERM, version)
         line = stream.readline()
         while not (SPECTRUM_NAME_PRESENT.match(line) or START_OF_SPECTRUM_MARKER.match(line)):
             nbytes += len(line)
             match = key_value_term_pattern.match(line)
             if match is not None:
                 d = match.groupdict()
                 attributes.add_attribute(
                     d['term'], try_cast(d['value']))
                 line = stream.readline()
                 nbytes += len(line)
                 continue
             if line.startswith("["):
                 match = grouped_key_value_term_pattern.match(line)
                 if match is not None:
                     d = match.groupdict()
                     attributes.add_attribute(
                         d['term'], try_cast(d['value']), d['group_id'])
                     attributes.group_counter = int(d['group_id'])
                     line = stream.readline()
                     nbytes += len(line)
                     continue
                 else:
                     raise ValueError(
                         f"Malformed grouped attribute {line}")
             elif "=" in line:
                 name, value = line.split("=")
                 attributes.add_attribute(name, value)
             else:
                 raise ValueError(f"Malformed attribute line {line}")
             line = stream.readline()
         self.attributes.clear()
         self.attributes._from_iterable(attributes)
         return True, nbytes
     return False, 0
Esempio n. 7
0
 def parse(fp: IOBase):
     """Parses only RFC2822 headers from a file pointer.
     """
     headers = Headers()
     while True:
         line = fp.readline(_MAXLINE + 1)
         if len(line) > _MAXLINE:
             raise client.LineTooLong("header line")
         headers.add_header_line(line)
         if len(headers) > _MAXHEADERS:
             raise client.HTTPException("got more than %d headers" %
                                        _MAXHEADERS)
         if line in (b'\r\n', b'\n', b''):
             break
     return headers
Esempio n. 8
0
def _read_part_headers(stream: io.IOBase) -> int:
    total_bytes = 0
    while True:
        line = stream.readline()
        bytes_read = len(line)
        total_bytes = total_bytes + bytes_read

        l_str = line.decode()
        # print(f"read_part_headers: line({len(line)} bytes): '{l_str}'")
        if bytes_read < 3:
            if l_str in ["\r\n", "\n"]:
                break
            if bytes_read == 0:
                raise Exception("unexpected 0-length line")

    return total_bytes
Esempio n. 9
0
def text_to_node(self, iio: io.IOBase, parts_list):
    '''
    CNLを読み込み
    '''

    def find_part(name, path):
        if name != None:
            for y in parts_list:
                if y.name == name:
                    return y
        elif path != None:
            for y in parts_list:
                if y.path == path:
                    return y

    index = 0
    while iio.readable():
        line=iio.readline().strip()

        line = line.split(' ')
        if line[0] == 'None':
            index+=1
                
        elif line[0] == '[Name]':
            name = line[1]
                
        elif line[0] == '[Path]':
            if len(line) == 1:
                path = ''
            else:
                path = line[1]
                    
        elif line[0] == '[Child]':
            self.children[index].connect(find_part(name, path))
            text_to_node(self.children[index], iio, parts_list)
            index+=1
          
        elif line[0] == '[Parent]':
            return

        elif line[0] == 'MATERIAL':
            return
Esempio n. 10
0
def read_data(line: str, f: io.IOBase, num_peaks: int) -> Generator[Tuple[float], None, None]:
    mz = intensity = ''
    icol = False  # whether we are in intensity column or not
    peaks_read = 0
    
    while True:
        if line == '\n':
            return
            
        if line[:5].upper() == 'NAME:':
            try:
                f.seek(f.tell()-len(line)-1, os.SEEK_SET)
            except io.UnsupportedOperation:
                pass
            return
        
        for char in line:
            if char in '()[]{}':  # Ignore brackets
                continue
            elif char in ' \t,;:\n':  # Delimiter
                if icol and mz and intensity:
                    yield float(mz), float(intensity)
                    peaks_read += 1
                    if peaks_read >= num_peaks:
                        return
                    mz = intensity = ''
                icol = not icol
            elif not icol:
                mz += char
            else:
                intensity += char
                
        line = f.readline()
        if not line:
            break
                
    if icol and mz and intensity:
        yield float(mz), float(intensity)
Esempio n. 11
0
def header_and_file(infile: IOBase, bytesio=False, only_header=False) -> tuple:
    """opens filename and splits the header from the file. returns tuple of header(str) and file(bytes)
    file = file to split
    rootdir = the root of all media storage sent to the default_storage class
    bytesio = return file in bytesio mode instead of as bytes
    only_header = discard the file after reading and return a tuple of the header(str) and None
    """
    infile.seek(0)
    header = b""
    addnext = b""

    #iterate until the end of the header
    while addnext != b"---END HEADER---\n":
        addnext = infile.readline()
        header += addnext
    #add the rest of the file to retfile
    if not only_header:
        if bytesio:
            retfile = infile
        else:
            retfile = infile.read()
            infile.close()

    return (header.decode("ascii"), None if only_header else retfile)
Esempio n. 12
0
 def _parse_header_from_stream(self, stream: io.IOBase) -> Tuple[bool, int]:
     first_line = stream.readline()
     if re.match("Name: ", first_line):
         return True, 0
     return False, 0