def __serialize_morph_tree_node(res: io.IOBase, tn: 'MorphTreeNode') -> None: if (tn.rules is not None): for r in tn.rules: MorphSerializeHelper.__serialize_short(res, r._id0_) MorphSerializeHelper.__serialize_short(res, 0) if (tn.reverce_variants is not None): for v in tn.reverce_variants: MorphSerializeHelper.__serialize_string( res, Utils.ifNotNull(v.tail, "")) if (v.rule is not None): pass MorphSerializeHelper.__serialize_short( res, (0 if v.rule is None else v.rule._id0_)) MorphSerializeHelper.__serialize_short(res, v.coef) MorphSerializeHelper.__serialize_morph_rule_variant(res, v) MorphSerializeHelper.__serialize_string(res, None) if (tn.nodes is not None): for n in tn.nodes.items(): MorphSerializeHelper.__serialize_short(res, n[0]) p0 = res.tell() MorphSerializeHelper.__serialize_int(res, 0) MorphSerializeHelper.__serialize_morph_tree_node(res, n[1]) p1 = res.tell() res.seek(p0, io.SEEK_SET) MorphSerializeHelper.__serialize_int(res, p1) res.seek(p1, io.SEEK_SET) MorphSerializeHelper.__serialize_short(res, 0xFFFF)
def deserialize(self, stream: io.IOBase, all0_: typing.List['Referent'], sofa: 'SourceOfAnalysis') -> None: typ = SerializerHelper.deserializeString(stream) cou = SerializerHelper.deserializeInt(stream) i = 0 while i < cou: typ = SerializerHelper.deserializeString(stream) c = SerializerHelper.deserializeInt(stream) id0_ = SerializerHelper.deserializeInt(stream) val = None if (id0_ < 0): val = (all0_[(-id0_) - 1]) elif (id0_ > 0): stream.seek(stream.tell() - (4), io.SEEK_SET) val = (SerializerHelper.deserializeString(stream)) self.addSlot(typ, val, False, c) i += 1 cou = SerializerHelper.deserializeInt(stream) self.__m_occurrence = list() i = 0 while i < cou: a = TextAnnotation._new2691(sofa, self) self.__m_occurrence.append(a) a.begin_char = SerializerHelper.deserializeInt(stream) a.end_char = SerializerHelper.deserializeInt(stream) attr = SerializerHelper.deserializeInt(stream) if (((attr & 1)) != 0): a.essential_for_occurence = True i += 1
def deserialize(self, stream : io.IOBase) -> bool: vers = 0 b = Utils.readByteIO(stream) if (b == (0xAA)): b = (Utils.readByteIO(stream)) vers = (b) else: stream.seek(stream.tell() - (1), io.SEEK_SET) self.__m_sofa = SourceOfAnalysis(None) self.__m_sofa.deserialize(stream) self.base_language = MorphLang._new5(SerializerHelper.deserializeInt(stream)) self.__m_entities = list() cou = SerializerHelper.deserializeInt(stream) i = 0 while i < cou: typ = SerializerHelper.deserializeString(stream) r = ProcessorService.createReferent(typ) if (r is None): r = Referent("UNDEFINED") self.__m_entities.append(r) i += 1 i = 0 while i < cou: self.__m_entities[i].deserialize(stream, self.__m_entities, self.__m_sofa) i += 1 self.first_token = SerializerHelper.deserializeTokens(stream, self, vers) self.__createStatistics() return True
def __init__(self, file: IOBase): self.chunk_size = CHUNK_SIZE line_num = 1 """ lines_locations[i] stores the file-offset in bytes of line i for every i such that i-1 is a multiple of CHUNK_SIZE. For example, if CHUNK_SIZE == 1000, then the keys in lines_locations dictionary are 1, 1001, 2001, etc. """ self.lines_locations = {} while file.readline(): """ We iterate over the file and store in the map the locations doing steps of size CHUNK_SIZE. """ location = file.tell() if not (line_num - 1) % self.chunk_size: self.lines_locations[line_num] = location line_num += 1 self.file = file self.file.seek(0) self.header = file.readline() self.iter_line = 1 self.length = line_num - 2 return None
def _find_backwards( self, src: io.IOBase, tok: HighLevelTokenizer, text_to_find: str, ) -> int: # length of str to check str_len = 1024 # go to end of file src.seek(0, io.SEEK_END) file_length = src.tell() pos = file_length - str_len if pos < 1: pos = 1 while pos > 0: src.seek(pos) bytes_near_eof = "".join( [tok._next_char() for _ in range(0, str_len)]) idx = bytes_near_eof.find(text_to_find) if idx >= 0: return pos + idx pos = pos - str_len + len(text_to_find) # raise error return -1
def seek_then_rewind(fd: io.IOBase, seek=0) -> typing.IO: pos = fd.tell() if seek is not None: fd.seek(seek) try: yield fd finally: fd.seek(pos)
def peek(stream: IOBase, chunk_size: int) -> str: if hasattr(stream, 'peek'): return stream.peek(chunk_size) else: current_pos = stream.tell() result = stream.read(chunk_size) stream.seek(current_pos) return result
def __call__(self, stream: io.IOBase): pos = stream.tell() try: return self.__parser(stream) except ParseError: pass stream.seek(pos) return None
def _readSlx(stream: io.IOBase, blocksize: int, formver: List[int], strict: bool) -> Frame: format = formver[0] version = formver[1] f = FRAME_FORMATS[format] s = struct.calcsize(f) here = stream.tell() bad = 0 while True: buf = stream.read(s) if buf == b'': # EOF return None if len(buf) < s: print(f'This is bad. Only got {len(buf)}/{s} bytes=', buf) raise NotEnoughDataError("this is bad") data = struct.unpack(f, buf) if data[0] == here: # offset is always first value if bad > 1: logger.warn('got back at offset: %s', here) break elif here > 0: bad += 1 if bad == 1: logger.warn('unexpected offset %s at location: %s. will try to find next frame', data[0], here) if strict: raise OffsetError('offset missmatch') # jump forward and try to catch next here += 1 stream.seek(here) continue else: raise OffsetError('location does not match expected offset') kv = {'headersize': s} for i, d in enumerate(FRAME_DEFINITIONS[format]): name = d['name'] if not name == "-": kv[name] = data[i] if name == 'flags' and FLAG_FORMATS[format]: if FLAG_AS_BINARY: kv[name] = f'({kv[name]}) {kv[name]:016b}' flagform = FLAG_FORMATS[format] flags = data[i] for k, v in flagform.items(): kv[k] = flags & v == v b = Frame(**kv) packetsize = b.packetsize if version == 1 and not b.has_tbd1: packetsize = b.framesize - 168 if version == 1 or (version == 2 and b.channel <= 5): extra = 168-s stream.read(extra) b.packet = stream.read(packetsize) return b
def read(self, io_source: io.IOBase) -> "Canvas": """ This method reads a byte stream of canvas operators, and processes them, returning this Canvas afterwards """ io_source.seek(0, os.SEEK_END) length = io_source.tell() io_source.seek(0) canvas_tokenizer = HighLevelTokenizer(io_source) # process content operand_stk = [] instruction_number: int = 0 while canvas_tokenizer.tell() != length: # print("<canvas pos='%d' length='%d' percentage='%d'/>" % ( canvas_tokenizer.tell(), length, int(canvas_tokenizer.tell() * 100 / length))) # attempt to read object obj = canvas_tokenizer.read_object() if obj is None: break # push argument onto stack if not isinstance(obj, CanvasOperatorName): operand_stk.append(obj) continue # process operator instruction_number += 1 operator = self.canvas_operators.get(obj, None) if operator is None: logger.debug("Missing operator %s" % obj) continue if not self.in_compatibility_section: assert len(operand_stk) >= operator.get_number_of_operands() operands: typing.List["CanvasOperator"] = [] # type: ignore [name-defined] for _ in range(0, operator.get_number_of_operands()): operands.insert(0, operand_stk.pop(-1)) # debug operand_str = str([str(x) for x in operands]) if len(operands) == 1 and isinstance(operands[0], list): operand_str = str([str(x) for x in operands[0]]) logger.debug("%d %s %s" % (instruction_number, operator.text, operand_str)) # invoke try: operator.invoke(self, operands) except Exception as e: if not self.in_compatibility_section: raise e # return return self
def __call__(self, stream: io.IOBase): values = [] pos = stream.tell() try: value = self.__parser(stream) except ParseError: stream.seek(pos) return values values.append(value) while True: pos = stream.tell() try: self.__sep_parser(stream) except ParseError: stream.seek(pos) break value = self.__parser(stream) values.append(value) return values
def __call__(self, stream: io.IOBase): values = [] for _ in range(self.__min_repeats): values.append(self.__parser(stream)) repeats = self.__min_repeats while self.__max_repeats is None or repeats < self.__max_repeats: pos = stream.tell() try: values.append(self.__parser(stream)) except ParseError: stream.seek(pos) break return values
def download( self, file: IOBase, response: Response, download_version: DownloadVersion, session: B2Session, encryption: Optional[EncryptionSetting] = None, ): """ Download a file from given url using parallel download sessions and stores it in the given download_destination. """ remote_range = self._get_remote_range(response, download_version) actual_size = remote_range.size() start_file_position = file.tell() parts_to_download = list( gen_parts( remote_range, Range(start_file_position, start_file_position + actual_size - 1), part_count=self._get_number_of_streams( download_version.content_length), )) first_part = parts_to_download[0] hasher = hashlib.sha1() with WriterThread(file, max_queue_depth=len(parts_to_download) * 2) as writer: self._get_parts( response, session, writer, hasher, first_part, parts_to_download[1:], self._get_chunk_size(actual_size), encryption=encryption, ) bytes_written = writer.total # At this point the hasher already consumed the data until the end of first stream. # Consume the rest of the file to complete the hashing process self._finish_hashing(first_part, file, hasher, download_version.content_length) return bytes_written, hasher.hexdigest()
def readfile(stream: IOBase, writer: csv.DictWriter, formver: List[int], maxcount: int = 20): count = 0 last = 0 offset = 8 last_end = 8 while True: stream.seek(offset) buf = stream.read(4) if buf == b'' or len(buf) < 4: logger.info('no more data.') break # read data as if offset data = struct.unpack('<I', buf) if data[0] == offset: # yes, we have an equal stream.seek(offset) # go back a bit fr = Frame.read(stream, formver) told = stream.tell() dct = fr.to_dict(formver[0]) dct['start'] = offset dct['end'] = told dct['offby'] = offset - last_end dct['size'] = offset - last dct['asdf'] = [fr.channel, f'({fr.flags}) {fr.flags:016b}'] writer.writerow(dct) # print( # 'match at', offset, 'now', dct['now'], 'size', offset - last, 'asd', now-offset-fr.headersize, # fr.to_dict(format=3, fields=['offset', 'index', 'latitude', 'packetsize', 'headersize']) # ) last_end = told last = offset count += 1 offset += 1 if count >= maxcount: break return count
def read_data(line: str, f: io.IOBase, num_peaks: int) -> Generator[Tuple[float], None, None]: mz = intensity = '' icol = False # whether we are in intensity column or not peaks_read = 0 while True: if line == '\n': return if line[:5].upper() == 'NAME:': try: f.seek(f.tell()-len(line)-1, os.SEEK_SET) except io.UnsupportedOperation: pass return for char in line: if char in '()[]{}': # Ignore brackets continue elif char in ' \t,;:\n': # Delimiter if icol and mz and intensity: yield float(mz), float(intensity) peaks_read += 1 if peaks_read >= num_peaks: return mz = intensity = '' icol = not icol elif not icol: mz += char else: intensity += char line = f.readline() if not line: break if icol and mz and intensity: yield float(mz), float(intensity)
def __call__(self, stream: io.IOBase): pos = stream.tell() exceptions = [] for choice in self._choices: try: return choice(stream) except ParseError as exception: exceptions.append(exception) stream.seek(pos) furthest_pos = max(e.pos for e in exceptions) exceptions = [exception for exception in exceptions if exception.pos == furthest_pos] if len(exceptions) == 1: raise exceptions[0] reasons = (exception.reason for exception in exceptions) joined_reasons = '\n'.join('Option %d: %s' % (index, reason) for index, reason in enumerate(reasons)) raise ParseError(furthest_pos, 'Tried these options:\n%s' % joined_reasons)
def _read_string(fp: IOBase, wide: bool = False) -> str: buf, end = b"", -1 offset = fp.tell() # locate string end while end == -1: chunk = fp.read(64) if not chunk: raise VDFDecodeError(f"Unterminated cstring (offset: {offset})") buf += chunk end = buf.find(b"\x00\x00" if wide else b"\x00") if wide: end += end % 2 # rewind fp fp.seek(end - len(buf) + (2 if wide else 1), 1) # decode string result = buf[:end] return result.decode("utf-16") if wide else result.decode("utf-8", "replace")
def __call__(self, stream: io.IOBase): pos = stream.tell() char = stream.read(1) if char != '' and self.__pred(char): return char raise ParseError(pos, 'Expected any of %s.' % repr(self))
def read(self, io_source: io.IOBase) -> "Canvas": io_source.seek(0, os.SEEK_END) length = io_source.tell() io_source.seek(0) canvas_tokenizer = HighLevelTokenizer(io_source) # process content operand_stk = [] while canvas_tokenizer.tell() != length: # attempt to read object obj = canvas_tokenizer.read_object() if obj is None: break # push argument onto stack if not isinstance(obj, CanvasOperatorName): operand_stk.append(obj) continue # process operator candidate_ops = [ x for x in self.canvas_operators if x.get_text() == str(obj) ] if len(candidate_ops) == 1: operator = candidate_ops[0] if len(operand_stk) < operator.get_number_of_operands(): # if we are in a compatibility section ignore any possible mistake if self.in_compatibility_section: continue raise IllegalGraphicsStateError( message="Unable to execute operator %s. Expected %d arguments, received %d." % ( operator.text, operator.get_number_of_operands(), len(operand_stk), ) ) operands = [] for _ in range(0, operator.get_number_of_operands()): operands.insert(0, operand_stk.pop(-1)) # append if "Instructions" not in self: self["Instructions"] = List().set_parent(self) instruction_number = len(self["Instructions"]) instruction_dictionary = Dictionary() instruction_dictionary["Name"] = operator.get_text() instruction_dictionary["Args"] = List().set_parent( instruction_dictionary ) if len(operands) > 0: for i in range(0, len(operands)): instruction_dictionary["Args"].append(operands[i]) self["Instructions"].append(instruction_dictionary) # debug logger.debug( "%d %s %s" % ( instruction_number, operator.text, str([str(x) for x in operands]), ) ) # invoke try: operator.invoke(self, operands) except Exception as e: if not self.in_compatibility_section: raise e # unknown operator if len(candidate_ops) == 0: # print("Missing OPERATOR %s" % obj) pass # return return self
def main(input_: io.IOBase, account_id: int): """ Train and evaluate Pearson based recommendations. """ logging.basicConfig( format="%(asctime)s (%(module)s) %(levelname)s %(message)s", level=logging.INFO, stream=sys.stderr, ) my_tanks = requests.get("http://api.worldoftanks.ru/wot/account/tanks/", params={ "account_id": account_id, "application_id": "demo", "fields": "tank_id,statistics", }).json()["data"][str(account_id)] train_items = { int(tank["tank_id"]): tank["statistics"]["wins"] / tank["statistics"]["battles"] for tank in my_tanks if tank["statistics"]["battles"] >= 30 } test_items = dict(train_items.popitem() for _ in range(len(train_items) // 5)) logging.info("%d train items. %d test items.", len(train_items), len(test_items)) similarity_sums = collections.Counter() model = collections.Counter() for i in itertools.count(): if i % 1000 == 0: logging.info("#%d | input: %.1fMiB", i, input_.tell() / kit.MB) stats = kit.read_account_stats(input_) if not stats: break _account_id, tanks = stats if _account_id == account_id: continue other_rated_items = {tank.tank_id: tank.wins / tank.battles for tank in tanks} similarity = pearson(train_items, other_rated_items) if similarity <= 0.0: continue for tank_id, my_rating in other_rated_items.items(): similarity_sums[tank_id] += similarity model[tank_id] += similarity * my_rating print("Model Predictions:") print() for chunk in kit.chop(model.items(), 4): for tank_id, my_rating in chunk: print( "%16s: %6.2f" % ( encyclopedia.TANKS[tank_id]["short_name_i18n"], 100.0 * my_rating / similarity_sums[tank_id], ), end="", ) print() print() print("Test Items:") print() my_total_rating = ( sum(tank["statistics"]["wins"] for tank in my_tanks) / sum(tank["statistics"]["battles"] for tank in my_tanks) ) precise_50 = 0 precise_my_rating = 0 for chunk in kit.chop(test_items.items(), 3): for tank_id, my_rating in chunk: predicted_rating = model[tank_id] / similarity_sums[tank_id] print( "%16s: %6.2f (%5.2f)" % ( encyclopedia.TANKS[tank_id]["short_name_i18n"], 100.0 * my_rating, 100.0 * predicted_rating, ), end="", ) precise_50 += (predicted_rating >= 0.5) == (my_rating >= 0.5) precise_my_rating += (predicted_rating >= my_total_rating) == (my_rating >= my_total_rating) print() print() print("Precision (> 50.00%%): %.1f." % (100.0 * precise_50 / len(test_items))) print("Precision (> %.2f%%): %.1f." % (100.0 * my_total_rating, 100.0 * precise_my_rating / len(test_items)))
def _get_size_till_eof(fobj: io.IOBase) -> int: start = fobj.tell() fobj.seek(0, io.SEEK_END) end = fobj.tell() fobj.seek(start) return end - start
def _readSlg(fs: io.IOBase, blocksize: int) -> Frame: start = fs.tell() # show(fs, '--- start') # fs.read(1) # skip 1 headerlen = 2 try: flags = unreadpack(fs, '<H')[0] except EOFError: return None kv = {'flags': flags} f = _FlagsF1(flags) # print(f'-- {start}\t| {hex(start)}\t> {flags:016b}') # print_attributes(f) if flags > 0: try: # B=byte, H=ushort, h=short, I=uint, i=int, f=float if f.has_depth | f.has_surface_depth: kv['lower_limit'] = unreadpack(fs, '<f')[0] if f.has_depth | f.has_surface_depth: kv['water_depth'] = unreadpack(fs, '<f')[0] if f.has_temp: kv['water_temperature'] = unreadpack(fs, '<f')[0] if f.has_waterspeed: kv['water_speed'] = unreadpack(fs, '<f')[0] if f.has_position: data = unreadpack(fs, '<II') kv['lon_enc'] = data[0] kv['lat_enc'] = data[1] if f.has_surface_depth: kv['surface_depth'] = unreadpack(fs, '<f')[0] if f.has_tob: kv['top_of_bottom'] = unreadpack(fs, '<f')[0] if f.has_temp2: kv['temp2'] = unreadpack(fs, '<f')[0] if f.has_temp3: kv['temp3'] = unreadpack(fs, '<f')[0] if f.has_time: kv['time1'] = unreadpack(fs, '<I')[0] if f.has_speed_track: data = unreadpack(fs, '<ff') kv['gps_speed'] = data[0] kv['heading'] = data[1] if f.test_valid_alititude: kv['altitude'] = unreadpack(fs, '<f')[0] else: data = unreadpack(fs, '<ff') kv['other'] = data[0] kv['altitude'] = data[1] # show(fs, 'before packet') kv['packetsize'] = unreadpack(fs, '<H')[0] except EOFError: return None # else: # data = [] # print('Unknown flags', flags) # show(fs, 'end') headerlen = fs.tell() - start kv['headerlen'] = headerlen calc_size = blocksize-headerlen packet_size = kv['packetsize'] # print(f'headerlen={headerlen}, calc size={calc_size}, pz={packet_size}') b = Frame(**kv) # print_attributes(b) if calc_size != packet_size: raise Exception(f'missmatched packetsize. got {packet_size} want {calc_size}') b.packet = fs.read(blocksize-headerlen) return b
def read(self, io_source: io.IOBase) -> "Canvas": io_source.seek(0, os.SEEK_END) length = io_source.tell() io_source.seek(0) canvas_tokenizer = HighLevelTokenizer(io_source) # process content operand_stk = [] while canvas_tokenizer.tell() != length: # print("<canvas pos='%d' length='%d' percentage='%d'/>" % ( canvas_tokenizer.tell(), length, int(canvas_tokenizer.tell() * 100 / length))) # attempt to read object obj = canvas_tokenizer.read_object() if obj is None: break # push argument onto stack if not isinstance(obj, CanvasOperatorName): operand_stk.append(obj) continue # process operator operator = self.canvas_operators.get(obj, None) if operator is None: logger.debug("Missing operator %s" % obj) continue if not self.in_compatibility_section: assert len(operand_stk) >= operator.get_number_of_operands() operands: typing.List["CanvasOperator"] = [] # type: ignore [name-defined] for _ in range(0, operator.get_number_of_operands()): operands.insert(0, operand_stk.pop(-1)) # append if "Instructions" not in self: self["Instructions"] = List().set_parent(self) # type: ignore [attr-defined] instruction_number = len(self["Instructions"]) instruction_dictionary = Dictionary() instruction_dictionary["Name"] = operator.get_text() instruction_dictionary["Args"] = List().set_parent( # type: ignore [attr-defined] instruction_dictionary ) if len(operands) > 0: for i in range(0, len(operands)): instruction_dictionary["Args"].append(operands[i]) self["Instructions"].append(instruction_dictionary) # debug logger.debug( "%d %s %s" % ( instruction_number, operator.text, str([str(x) for x in operands]), ) ) # invoke try: operator.invoke(self, operands) except Exception as e: if not self.in_compatibility_section: raise e # return return self
def _get_stream_len(stream: io.IOBase) -> int: current = stream.tell() try: return stream.seek(0, os.SEEK_END) finally: stream.seek(current)
def __call__(self, stream: io.IOBase): pos = stream.tell() if stream.read(len(self.__string)) == self.__string: return self.__string raise ParseError(pos, 'Expected string %s.' % repr(self.__string))