def _from_io(self, source: IO): """ Loads an existing JVM ClassFile from any file-like object. """ read = source.read if unpack('>I', source.read(4))[0] != ClassFile.MAGIC: raise ValueError('invalid magic number') # The version is swapped on disk to (minor, major), so swap it back. self.version = unpack('>HH', source.read(4))[::-1] self._constants.unpack(source) # ClassFile access_flags, see section #4.1 of the JVM specs. self.access_flags.unpack(read(2)) # The CONSTANT_Class indexes for "this" class and its superclass. # Interfaces are a simple list of CONSTANT_Class indexes. self._this, self._super, interfaces_count = unpack('>HHH', read(6)) self._interfaces = unpack( f'>{interfaces_count}H', read(2 * interfaces_count) ) self.fields.unpack(source) self.methods.unpack(source) self.attributes.unpack(source)
def scrape_variables(host: Text, logs_file: IO) -> None: br = mechanize.Browser() cj = cookielib.LWPCookieJar() br.set_cookiejar(cj) br.set_handle_equiv(True) # br.set_handle_gzip(True) br.set_handle_redirect(True) br.set_handle_referer(True) br.set_handle_robots(False) login_url = urlparse.urljoin(host, '/login') logging.info('Starting login into %s', login_url) response = br.open(login_url) br.form = next(iter(br.forms())) br.form['username'] = '******' with open('../data/secret_key.txt') as f: br.form['password'] = f.read() br.method = 'POST' br.submit() br.method = 'GET' logging.info('Successfully logged into %s', login_url) variables_url = urlparse.urljoin(host, '/monitor/variables') while True: try: response = br.open(variables_url) except urllib2.URLError as e: logging.error('Could not open "%s": %s', variables_url, e) time.sleep(59 + random.random()) continue raw_vars = response.read() logs_file.write(raw_vars) logs_file.write('\n') # variables = json.loads(raw_vars) time.sleep(59 + random.random())
def _get_single_df( stream: IO, filetype: Optional[TypeEnum], **kwargs ) -> Union[pd.DataFrame, Iterable[pd.DataFrame]]: """ Read a stream and retrieve the data frame or data frame generator (chunks) It uses `stream.name`, which is the path to a local file (often temporary) to avoid closing it. It will be closed at the end of the method. """ if filetype is None: filetype = TypeEnum(detect_type(stream.name)) # Check encoding encoding = kwargs.get('encoding') if not validate_encoding(stream.name, encoding): encoding = detect_encoding(stream.name) kwargs['encoding'] = encoding # Check separator for CSV files if it's not set if filetype is TypeEnum.CSV and 'sep' not in kwargs: if not validate_sep(stream.name, encoding=encoding): kwargs['sep'] = detect_sep(stream.name, encoding) pd_read = getattr(pd, f'read_{filetype}') try: df = pd_read(stream.name, **kwargs) finally: stream.close() # In case of sheets, the df can be a dictionary if kwargs.get('sheet_name', NOTSET) is None: for sheet_name, _df in df.items(): _df['__sheet__'] = sheet_name df = pd.concat(df.values(), sort=False) return df
def http_get(url: str, temp_file: IO) -> None: req = requests.get(url, stream=True) content_length = req.headers.get('Content-Length') total = int(content_length) if content_length is not None else None for chunk in req.iter_content(chunk_size=1024): if chunk: # filter out keep-alive new chunks temp_file.write(chunk)
def run(f: t.IO, out: t.IO = sys.stdout) -> None: r = csv.DictReader(f) rows = list(r) w = ColorfulWriter(out, fieldnames=list(rows[0].keys())) w.writeheader() w.writerows(rows) out.write(RESET)
def decode(input: IO, output: IO) -> None: """Decode a file; input and output are binary files.""" while True: line = input.readline() if not line: break s = binascii.a2b_base64(line) output.write(s)
def http_get(url: str, temp_file: IO) -> None: req = requests.get(url, stream=True) content_length = req.headers.get('Content-Length') total = int(content_length) if content_length is not None else None progress = Tqdm.tqdm(unit="B", total=total) for chunk in req.iter_content(chunk_size=1024): if chunk: # filter out keep-alive new chunks progress.update(len(chunk)) temp_file.write(chunk) progress.close()
def _download_to_stream(self, blobname: str, stream: IO) -> bool: try: resource = self._azure_client.get_object(blobname) except ObjectDoesNotExistError: return False else: for chunk in resource.as_stream(): stream.write(chunk) return True
def encode(input: IO, output: IO) -> None: """Encode a file; input and output are binary files.""" while True: s = input.read(MAXBINSIZE) if not s: break while len(s) < MAXBINSIZE: ns = input.read(MAXBINSIZE-len(s)) if not ns: break s += ns line = binascii.b2a_base64(s) output.write(line)
def pack(self, out: IO): """ Write the Field to the file-like object `out`. .. note:: Advanced usage only. You will typically never need to call this method as it will be called for you when saving a ClassFile. :param out: Any file-like object providing `write()` """ out.write(self.access_flags.pack()) out.write(pack('>HH', self._name_index, self._descriptor_index)) self.attributes.pack(out)
def pack(self, out: IO): """ Write the FieldTable to the file-like object `out`. .. note:: Advanced usage only. You will typically never need to call this method as it will be called for you when saving a ClassFile. :param out: Any file-like object providing `write()` """ out.write(pack('>H', len(self))) for field in self._table: field.pack(out)
def unpack(self, source: IO): """ Read the Field from the file-like object `fio`. .. note:: Advanced usage only. You will typically never need to call this method as it will be called for you when loading a ClassFile. :param source: Any file-like object providing `read()` """ self.access_flags.unpack(source.read(2)) self._name_index, self._descriptor_index = unpack('>HH', source.read(4)) self.attributes.unpack(source)
def embed_file(self, input_file: IO, output_file_path: str, output_format: str = "all", batch_size: int = DEFAULT_BATCH_SIZE) -> None: """ Computes ELMo embeddings from an input_file where each line contains a sentence tokenized by whitespace. The ELMo embeddings are written out in HDF5 format, where each sentences is saved in a dataset. Parameters ---------- input_file : ``IO``, required A file with one tokenized sentence per line. output_file_path : ``str``, required A path to the output hdf5 file. output_format : ``str``, optional, (default = "all") The embeddings to output. Must be one of "all", "top", or "average". batch_size : ``int``, optional, (default = 64) The number of sentences to process in ELMo at one time. """ assert output_format in ["all", "top", "average"] # Tokenizes the sentences. sentences = [line.strip() for line in input_file if line.strip()] split_sentences = [sentence.split() for sentence in sentences] # Uses the sentence as the key. embedded_sentences = zip(sentences, self.embed_sentences(split_sentences, batch_size)) logger.info("Processing sentences.") with h5py.File(output_file_path, 'w') as fout: for key, embeddings in Tqdm.tqdm(embedded_sentences): if key in fout.keys(): logger.warning(f"Key already exists in {output_file_path}, skipping: {key}") else: if output_format == "all": output = embeddings elif output_format == "top": output = embeddings[2] elif output_format == "average": output = numpy.average(embeddings, axis=0) fout.create_dataset( key, output.shape, dtype='float32', data=output ) input_file.close()
def unpack(self, source: IO): """ Read the ConstantPool from the file-like object `source`. .. note:: Advanced usage only. You will typically never need to call this method as it will be called for you when loading a ClassFile. :param source: Any file-like object providing `read()` """ count = unpack('>H', source.read(2))[0] for _ in repeat(None, count): name_index, length = unpack('>HI', source.read(6)) info_blob = source.read(length) self._table.append((name_index, info_blob))
def html_table_to_csv(input_f: IO, output_f: IO, table_num: int) -> None: doc = bs4.BeautifulSoup(input_f.read(), 'html5lib') tables = doc.find_all('table') try: table = tables[table_num] trows = table.find_all('tr') csv_writer = csv.writer(output_f) for trow in trows: cells = trow.find_all(RX_TH_OR_TD) csv_writer.writerow([cell.text.strip() for cell in cells]) except IndexError: sys.stderr.write('ERROR: no table at index {}\n'.format(table_num)) sys.exit(1)
def unpack(self, source: IO): """ Read the FieldTable from the file-like object `source`. .. note:: Advanced usage only. You will typically never need to call this method as it will be called for you when loading a ClassFile. :param source: Any file-like object providing `read()` """ field_count = unpack('>H', source.read(2))[0] for _ in repeat(None, field_count): field = Field(self._cf) field.unpack(source) self.append(field)
def _print_truncate( lines: Iterable, max_lines: int, outfile: IO, ) -> None: for i, line in enumerate(itertools.islice(lines, max_lines)): if i + 1 == max_lines: outfile.write('... (diff goes on) ...\n') else: outfile.write(line) if not line.endswith('\n'): outfile.write('<EOF>\n')
def style( text: str, fg: typing.Optional[int] = None, *, bold: bool = False, file: typing.IO = sys.stdout, ) -> str: use_color = not os.environ.get("NO_COLOR") and file.isatty() if use_color: parts = [ fg and f"\033[{fg}m", bold and f"\033[{BOLD}m", text, f"\033[{RESET_ALL}m", ] return "".join([e for e in parts if e]) else: return text
def parse_html(self, fh: IO) -> Dict[str, Any]: '''Return head and content elements of the document.''' capsule = html_parser.parse(fh.read(), maybe_xhtml=True) doc = etree.adopt_external_document(capsule).getroot() result = {} result['head'] = doc.cssselect('head')[0] for candidate in ('.main-column .section', '.main__content'): elements = doc.cssselect(candidate) if elements: result['main_content'] = elements[0] break if 'main_content' not in result: raise ValueError('No main content element found') return result
def get_length(stream: IO) -> int: """Gets the number of bytes in the stream.""" old_position = stream.tell() stream.seek(0) length = 0 try: while True: r = stream.read(1024) if not r: break length += len(r) finally: stream.seek(old_position) return length
def pack(self, out: IO): """ Write the AttributeTable to the file-like object `out`. .. note:: Advanced usage only. You will typically never need to call this method as it will be called for you when saving a ClassFile. :param out: Any file-like object providing `write()` """ out.write(pack('>H', len(self._table))) for attribute in self: info = attribute.pack() out.write(pack('>HI', attribute.name.index, len(info))) out.write(info)
def deserialize(file: IO): module_logger.info("Loading FernDetector from {}".format(file.name)) version = int(file.readline().strip()) if version != 1: msg = "Can't deserialize FernDetector from {}. Incorrect version of model. Expected 1, found {}"\ .format(file.name, version) module_logger.error(msg) raise AssertionError(msg) num_ferns = int(file.readline().strip()) ph, pw = map(int, file.readline().strip().split(",")) with Timer("Deserializing ferns"): ferns = [Fern.deserialize(file) for _ in range(num_ferns)] fern_bits, max_train, max_match = map( int, file.readline().strip().split(",")) with Timer("Deserializing fern_p"): F, C, K = map(int, file.readline().strip().split(",")) fern_p = np.zeros((F, C, K), dtype=float) for fern_idx in range(F): for class_idx in range(C): line = list(map(float, file.readline().strip().split(","))) fern_p[fern_idx, class_idx, :] = line line = file.readline().strip().split(",") key_points = list(grouper(map(int, line), 2)) module_logger.debug("Creating FernDetector") detector = FernDetector(patch_size=(ph, pw), max_train_corners=max_train, max_match_corners=max_match, ferns=ferns, ferns_p=fern_p, classes_cnt=C, key_points=key_points, fern_bits=fern_bits) return detector
def _write_top_defaults(openff_sys: "Interchange", top_file: IO): """Write [ defaults ] section""" top_file.write("[ defaults ]\n") top_file.write("; nbfunc\tcomb-rule\tgen-pairs\tfudgeLJ\tfudgeQQ\n") if "vdW" in openff_sys.handlers: nbfunc = 1 scale_lj = openff_sys["vdW"].scale_14 gen_pairs = "no" handler_key = "vdW" elif "Buckingham-6" in openff_sys.handlers: nbfunc = 2 gen_pairs = "no" scale_lj = openff_sys["Buckingham-6"].scale_14 handler_key = "Buckingham-6" else: raise UnsupportedExportError( "Could not find a handler for short-ranged vdW interactions that is compatible " "with GROMACS. Looked for handlers named `vdW` and `Buckingham-6`." ) mixing_rule = openff_sys[handler_key].mixing_rule if mixing_rule == "lorentz-berthelot": comb_rule = 2 elif mixing_rule == "geometric": comb_rule = 3 elif mixing_rule == "buckingham" and handler_key == "Buckingham-6": # TODO: Not clear what the compatibility is here. `comb-rule` only applies to LJ terms. # The documentation lists the combination rule for Buckingham potentials, but it does not # seem like GROMACS will do this automatically, and needs to be implemented manully via # [ nonbond_params ]. # https://manual.gromacs.org/current/reference-manual/topologies/parameter-files.html#non-bonded-parameters # https://gromacs.bioexcel.eu/t/how-to-use-buckingham-function/1181/4 comb_rule = 2 else: raise UnsupportedExportError( f"Mixing rule `{mixing_rule} not compatible with GROMACS and/or not supported " "by current exporter. Supported values are `lorentez-berthelot` and `geometric`." ) top_file.write("{:6d}\t{:6d}\t{:6s} {:8.6f} {:8.6f}\n\n".format( nbfunc, comb_rule, gen_pairs, scale_lj, openff_sys.handlers["Electrostatics"].scale_14, ))
def push_file(self, *, source: IO, destination: str, bufsize: int = 1024) -> None: """Passthrough for pushing a file through `multipass transfer`. :param IO source: a file-like object to read from :param str destination: the destination of the copied file, using syntax expected by multipass """ assert isinstance(source, io.IOBase) # can't use std{in,out}=open(...) due to LP#1849753 p = _popen([self.provider_cmd, "transfer", "-", destination], stdin=subprocess.PIPE) while True: read = source.read(bufsize) if read: p.stdin.write(read) if len(read) < bufsize: logger.debug("Finished streaming source file") break while True: try: out, err = p.communicate(timeout=1) except subprocess.TimeoutExpired: pass else: if p.returncode == 0: logger.debug("Process completed") break elif p.returncode is not None: raise errors.ProviderFileCopyError( provider_name=self.provider_name, exit_code=p.returncode)
def save_historical_prices(tmp_file: IO) -> None: """Прочитать исторические данные из файла и сохранить их в базу. :param tmp_file: временный файл, откуда откуда будут прочитаны данные и сохранены в файл """ for chunk in chunked(tmp_file.readlines(), Config.CHUNK_SIZE): historical_prices = [json.loads(line) for line in chunk] symbols: Set[str] = set(historical_price['symbol'] for historical_price in historical_prices) Ticker.insert_tickers(symbols) symbol_to_uuid: Dict[str, UUID] = Ticker.get_uuids_by_symbol() HistoricalPrice.bulk_insert([ dict( ticker_id=symbol_to_uuid[historical_price['symbol']], **{k: v for k, v in historical_price.items() if k != 'symbol'}) for historical_price in historical_prices ]) session.commit()
def __init__(self, f: IO): super().__init__() #: Key Offset (relative to key_table_offset) self.key_offset: int = read_u16(f, Endianess.LITTLE_ENDIAN) self.logger.debug(f'Key Offset: {self.key_offset}') #: Data Type self.data_type: DataType = DataType(f.read(2)) self.logger.debug(f'Data Type: {self.data_type}') #: Data Length (used bytes) self.data_length: int = read_u32(f, Endianess.LITTLE_ENDIAN) self.logger.debug(f'Data Length: {self.data_length}') #: Data Max Length self.data_max_length: int = read_u32(f, Endianess.LITTLE_ENDIAN) self.logger.debug(f'Data Max Length: {self.data_max_length}') #: Data Offset (relative to data_table_offset) self.data_offset: int = read_u32(f, Endianess.LITTLE_ENDIAN) self.logger.debug(f'Data Offset: {self.data_offset}')
def construct(self, fp: IO): abb_namer = get_unique_letterer() abb_names = {} for level, line in enumerate(fp.read().strip().split('\n'), start=1): floor = Floor() self.floors.append(floor) contents = \ line.replace(',', '')\ .replace('.', '')\ .replace(' and ', ' ')\ .split(' ')[4:] for parts in grouper(contents, 3): if parts[0] == 'nothing': break if parts[2] == 'generator': name = parts[1] abb_names[name] = abb_names.get(name, abb_namer(name)) floor.add(Generator(name, abb_names[name])) if parts[2] == 'microchip': name = parts[1].rsplit('-', 1)[0] abb_names[name] = abb_names.get(name, abb_namer(name)) floor.add(Chip(name, abb_names[name]))
def pprintjson( *obj: Union[Dict, List], indent: int = 4, end: str = "\n", file: IO = None, flush: bool = False, ) -> None: """ :param *obj: Union[Dict, List]: :param indent: int: (Default value = 4) :param end: str: (Default value = "\n") :param file: IO: (Default value = None) :param flush: bool: (Default value = False) """ file = stdout if file is None else file json = [dumps(o, indent=indent) for o in obj] try: if file.isatty(): json = [highlight(j, JsonLexer(), TerminalFormatter()) for j in json] except AttributeError: pass print(*json, end=end, file=file, flush=flush)
def main(input_file: IO): # Input Parsing input_lines = input_file.read().split('\n') adjacency_map = defaultdict(list) for y, row in enumerate(input_lines): for x, seat in enumerate(row): adjacency_map[(x, y)] = get_adjacent_coords(x, y, len(row) - 1, len(input_lines) - 1) previous = [] current = input_lines result = 0 while True: previous = current current = run_model(current, adjacency_map) if (joined := ''.join(current)) == ''.join(previous): result = joined.count(TAKEN) break
def parse_html(self, fh: IO) -> Dict[str, Any]: '''Return head and content elements of the document.''' capsule = html_parser.parse(fh.read(), maybe_xhtml=True) doc = etree.adopt_external_document(capsule).getroot() # Remove <style> tags for style in list(doc.iter("style")): style.getparent().remove(style) result = {} result['head'] = doc.cssselect('head')[0] for candidate in ('.main-column .section', '.main-column section', '.main__content'): elements = doc.cssselect(candidate) if elements: result['main_content'] = elements[0] break if 'main_content' not in result: raise ValueError('No main content element found') return result
def _write_angle_coeffs(lmp_file: IO, openff_sys: Interchange): """Write the Angle Coeffs section of a LAMMPS data file""" lmp_file.write("\nAngle Coeffs\n\n") angle_handler = openff_sys.handlers["Angles"] angle_type_map = dict(enumerate(angle_handler.potentials)) for angle_type_idx, smirks in angle_type_map.items(): params = angle_handler.potentials[smirks].parameters k = params["k"].to(unit.Unit("kilocalorie / mole / radian ** 2")).magnitude k = k * 0.5 # Account for LAMMPS wrapping 1/2 into k theta = params["angle"].to(unit.degree).magnitude lmp_file.write(f"{angle_type_idx+1:d} harmonic\t{k:.16g}\t{theta:.16g}\n") lmp_file.write("\n")
def _write_bond_coeffs(lmp_file: IO, openff_sys: Interchange): """Write the Bond Coeffs section of a LAMMPS data file""" lmp_file.write("Bond Coeffs\n\n") bond_handler = openff_sys.handlers["Bonds"] bond_type_map = dict(enumerate(bond_handler.potentials)) for bond_type_idx, smirks in bond_type_map.items(): params = bond_handler.potentials[smirks].parameters k = params["k"].to(unit.Unit("kilocalorie / mole / angstrom ** 2")).magnitude k = k * 0.5 # Account for LAMMPS wrapping 1/2 into k length = params["length"].to(unit.angstrom).magnitude lmp_file.write(f"{bond_type_idx+1:d} harmonic\t{k:.16g}\t{length:.16g}\n") lmp_file.write("\n")
def _print(self, stream: IO, message: str, **kwargs: Any) -> None: if None in (stream, message): return stream_tty = stream.isatty() print_tty = kwargs.pop('tty', True) if self._tty else kwargs.pop( 'tty', False) print_notty = kwargs.pop('notty', True) if self._notty else kwargs.pop( 'notty', False) if (stream_tty and print_tty) or (not stream_tty and print_notty): prefix = None if kwargs.pop('prefix', self._prefix): if callable(self._prefix): prefix = self._prefix() elif self._prefix: prefix = str(self._prefix) message = f'{prefix} {message}' if prefix else message if not stream.isatty() or not kwargs.pop('colors_enabled', self._colors_enabled): message = self.strip_style(message) else: style_args = { k: v for (k, v) in kwargs.items() if k in _style_keys } if len(style_args) > 0: message = self.style(message, **style_args) stream.write(message) endl = kwargs.pop('endl', self._endl) stream.write(endl) stream.flush()
def le(self, arq: IO): def converte_tabela_em_df() -> pd.DataFrame: df = pd.DataFrame(tabela) cols = ["Inicial" ] + [f"Estágio {s}" for s in range(1, n_semanas + 1)] df.columns = cols df["Usina"] = usinas df["Número"] = numeros df = df[["Número", "Usina"] + cols] return df # Salta duas linhas arq.readline() arq.readline() # Descobre o número de semanas linha = arq.readline() sems = [ s for s in linha.split(" ") if (len(s) > 0 and ("Sem" in s or "Mes" in s)) ] reg_usina = RegistroAn(12) reg_numero = RegistroIn(4) reg_vol = RegistroFn(6) n_semanas = len(sems) usinas: List[str] = [] numeros: List[int] = [] tabela = np.zeros((300, n_semanas + 1)) # Salta outra linha arq.readline() i = 0 while True: # Confere se a leitura não acabou linha = arq.readline() if "X-------X" in linha: tabela = tabela[:i, :] self._dados = converte_tabela_em_df() break # Senão, lê mais uma linha # Subsistema e REE numero = reg_numero.le_registro(linha, 4) usina = reg_usina.le_registro(linha, 9) numeros.append(numero) usinas.append(usina) # Semanas tabela[i, :] = reg_vol.le_linha_tabela(linha, 23, 1, n_semanas + 1) i += 1
def solve(input_file: typing.IO) -> typing.Generator[str, None, None]: start_square = int(input_file.readline().strip()) # PART 1 radius = 0 while (2 * radius + 1)**2 < start_square: radius += 1 square = (2 * radius + 1)**2 # Move clockwise around the grid to find the value at the appropriate radius pos = (radius, radius) while square != start_square: if pos[0] > -radius and pos[1] == radius: pos = (pos[0] - 1, pos[1]) elif pos[0] == -radius: pos = (pos[0], pos[1] - 1) elif pos[1] == -radius: pos = (pos[0] + 1, pos[1]) else: pos = (pos[0], pos[1] + 1) square -= 1 yield str(sum([abs(dim) for dim in pos])) # PART 2 grid = {(0, 0): 1} pos = (0, 0) dirs = ((1, 0), (0, -1), (-1, 0), (0, 1)) dir_idx = 0 while grid[pos] <= start_square: pos = move(pos, dirs[dir_idx]) grid[pos] = adjacent_sum(grid, pos) # wrote grid[] # If we can turn left, do so left_idx = (dir_idx + 1) % len(dirs) if move(pos, dirs[left_idx]) not in grid: dir_idx = left_idx yield str(grid[pos])
def initialize(self, log: IO, missions: MassacreMissions, initialized: bool): self.check_process() # have to store the current log in RAM # TODO: possbily avoid this way? events = log.readlines() if not events: raise RuntimeError ln = 0 # find the latest login event for existing missions for line in reversed(events): ln += 1 if '"event":"Missions"' in line: current_missions = json.loads(line) self.log_time = parse( current_missions['timestamp']).timestamp() # grab all mission ID's self.find_resumed_missions(current_missions) # find mission details in old journals self.find_mission_details(missions) # mark initialized as done initialized = True break if not initialized: raise RuntimeError elif self.label_texts.current_log_status.get( ) == "Waiting for log file update": self.label_texts.current_log_status.set( "Current log file: " + os.path.relpath(self.current_log_name, self.log_path)) # every entry before the restarting will be useless cut = len(events) - ln + 1 # check possible mission events before resume in current log self.read_event(events[:cut - 1], missions, False) # check all new mission and bounty events self.read_event(events[cut:], missions, initialized) # assign values to the labels return initialized
def paste(file: IO, syntax: str, expires: int, title: str, raw: bool, copy: bool) -> None: """ Paste to dpaste.com """ try: with open(CONF_PATH, "r") as conf_file: options = json.load(conf_file) except FileNotFoundError: options = _create_default_config() content = file.read() r = requests.post( "http://dpaste.com/api/v2/", data={ "title": title, "content": content, "syntax": (syntax or options.get("syntax") or get_syntax(file.name, content)), "expiry_days": expires or options.get("expires"), }, ) r.raise_for_status() url: str = r.text.strip() if raw or options.get("raw"): url += ".txt" click.echo(url) if copy or options["autocp"]: pyperclip.copy(url)
def parse_winner_mapping(file: IO) -> Dict: lines = file.readlines() lines = list(map(lambda l: l.replace('\n', ''), lines)) header = filter(lambda l: l.startswith('$'), lines) header = dict(map(lambda l: (l[1:].split(' ')[0], l.split(' ')[1]), header)) mapping = {} mappings = list(filter(lambda l: not l.startswith('$'), lines)) mappings = [(mappings[i], mappings[i + 1]) for i in range(0, len(mappings) - 1, 2)] for vec, winners in mappings: matchings = [] winner_info = winners.split(' ') for i in range(0, len(winner_info) - 1, 3): x = int(winner_info[i]) y = int(winner_info[i + 1]) distance = float(winner_info[i + 2]) matchings.append((x, y, distance)) mapping[vec] = matchings header['MAPPING'] = mapping return header
def paste_to_dpaste(config: Dict[str, Any], file: IO, syntax: str, expires: int, title: str, raw: bool, copy: bool) -> str: content = file.read() r = requests.post( 'http://dpaste.com/api/v2/', data={ 'title': title, 'content': content, 'syntax': syntax or config.get('syntax') or get_syntax(file.name, content), 'expiry_days': expires or config.get('expires'), }, ) r.raise_for_status() url: str = r.text.strip() if raw or config.get('raw'): url += '.txt' return url
def parse_map(map_file: IO) -> Dict[str, Object]: objects = {"COM": Object("COM", None, set(), 0)} for line in map_file.readlines(): parent_id, child_id = line.strip().split(")", 1) if parent_id not in objects: parent = Object(parent_id, None, set()) objects[parent.id] = parent else: parent = objects[parent_id] if child_id in objects: child = objects[child_id] else: child = Object(child_id, parent, set()) child.parent = parent if parent.orbit_count is not None: child.orbit_count = parent.orbit_count + 1 parent.children.add(child) objects[child.id] = child calculate_object_orbits(objects["COM"]) return objects
def publish_raw_request(deployment_file: IO, request_file: IO): """Request a new layer in every region in DEPLOYMENT_FILE. The Layer must be described in the Accretion format in REQUEST_FILE. .. code:: json { "Name": "layer name", "Language": "Language to target", "Requirements": { "Type": "accretion", "Requirements": [ { "Name": "Requirement Name", "Details": "Requirement version or other identifying details" } ] }, "Requirements": { "Type": "requirements.txt", "Requirements": "Raw contents of requirements.txt file format" } } .. note:: Language must be a valid `runtime prefix <https://docs.aws.amazon.com/lambda/latest/dg/lambda-runtimes.html>`_ (ex: "python", "java", etc). """ record = DeploymentFile.from_dict(json.load(deployment_file)) request = request_file.read() # TODO: Validate the request _publish_to_all_regions(record=record, request=request)
def main(input_file: IO): raw_input = input_file.read().split('\n\n') rules, my_ticket, nearby_tickets = raw_input rule_map = parse_rules(rules) my_ticket = [int(n) for n in my_ticket.split('\n')[1].split(',')] tickets = [row.split(',') for row in nearby_tickets.split('\n')][1:] possible_fields = [set()] * len(tickets[0]) for ticket in tickets: for idx, num in enumerate(ticket): fields = set() for name, rule in rule_map.items(): for from_num, to_num in rule: if from_num <= int(num) <= to_num: fields.add(name) if fields: possible_fields[idx] = \ possible_fields[idx].intersection(fields) \ if possible_fields[idx] else fields sorted_possible_fields = [[len(fields), idx, fields] for idx, fields in enumerate(possible_fields)] sorted_possible_fields.sort() visited = set() ans = 1 for idx, data in enumerate(sorted_possible_fields): length, index, fields = data field_name = list(fields - visited)[0] if 'departure' in field_name: ans *= my_ticket[index] visited = visited.union(fields) print('The answer for Day 16 Part B :', ans)
def unlock(file_: typing.IO): try: savepos = file_.tell() if savepos: file_.seek(0) try: msvcrt.locking(file_.fileno(), constants.LockFlags.UNBLOCK, lock_length) except IOError as exc: exception = exc if exc.strerror == 'Permission denied': hfile = win32file._get_osfhandle(file_.fileno()) try: win32file.UnlockFileEx(hfile, 0, -0x10000, __overlapped) except pywintypes.error as exc: exception = exc if exc.winerror == winerror.ERROR_NOT_LOCKED: # error: (158, 'UnlockFileEx', # 'The segment is already unlocked.') # To match the 'posix' implementation, silently # ignore this error pass else: # Q: Are there exceptions/codes we should be # dealing with here? raise else: raise exceptions.LockException( exceptions.LockException.LOCK_FAILED, exception.strerror, fh=file_) finally: if savepos: file_.seek(savepos) except IOError as exc: raise exceptions.LockException( exceptions.LockException.LOCK_FAILED, exc.strerror, fh=file_)
def download_component( logger, github_repo: github3.repos.repo.Repository, path_filter_func: typing.Callable, ref: str, target: typing.IO, ): url = github_repo._build_url( 'tarball', ref, base_url=github_repo._api, ) files_to_scan = 0 filtered_out_files = 0 with tarfile.open(fileobj=target, mode='w|') as tar_out, \ github_repo._get(url, allow_redirects=True, stream=True,) as res, \ tarfile.open(fileobj=res.raw, mode='r|*') as src: res.raise_for_status() # valid because first tar entry is root directory and has no trailing \ component_filename = src.next().name path_offset = len(component_filename) + 1 for tar_info in src: if path_filter_func(tar_info.name[path_offset:]): tar_out.addfile(tarinfo=tar_info, fileobj=src.fileobj) files_to_scan += 1 else: filtered_out_files += 1 logger.info(f'{files_to_scan=}, {filtered_out_files=}') tar_out_size = target.tell() return tar_out_size
def pipe(self, sink: IO, text_mode: bool = False) -> IO: """ Pipes the data from the current stream object into any file-like object. Args: sink (IO): Any file-like object or AbcStream object. text_mode (bool, optional): If True, writes string to sink rather than bytes. Defaults to False. Raises: ValueError: sink for pipe must be a filelike object - i.e. has write method Returns: IO: file-like object (i.e. sink) that is piped into. """ if hasattr(sink, "write") and callable(sink.write): # type: ignore # if empty, don't decode encoding = self.encoding or "utf-8" # update the encoding of the AbcStream to be same as source if hasattr(sink, "set_encoding") and encoding: sink.set_encoding(encoding) # type: ignore self._pipes.append((encoding if text_mode else "", sink)) # remember current pos pos = self.tell() # go to end of stream self._file.seek(0, 2) end = self.tell() # have some content if end > 0: # go to start self._file.seek(0) # stream existing to sink if text_mode: for line in iter(self._file.readline, b""): sink.write(line.decode(encoding)) else: for chunk in self._file: sink.write(chunk) # go back to original position self.seek(pos) return sink raise ValueError( "sink for pipe must be a filelike object - i.e. has write method")
def le(self, arq: IO): def converte_tabela_em_df() -> pd.DataFrame: df = pd.DataFrame(tabela) cols = [f"Estágio {s}" for s in range(1, n_semanas + 1)] df.columns = cols df["Subsistema"] = subsistemas df["Patamar"] = patamares df = df[["Subsistema", "Patamar"] + cols] return df # Salta uma linha arq.readline() # Descobre o número de semanas linha = arq.readline() sems = [ s for s in linha.split(" ") if (len(s) > 0 and ("Sem" in s or "Mes" in s)) ] reg_pat = RegistroAn(6) reg_cmo = RegistroFn(10) n_semanas = len(sems) subsistemas: List[str] = [] patamares: List[str] = [] tabela = np.zeros((4 * len(SUBSISTEMAS), n_semanas)) # Salta outra linha arq.readline() i = 0 while True: # Confere se a leitura não acabou linha = arq.readline() if "X------X" in linha: self._dados = converte_tabela_em_df() break # Senão, lê mais uma linha # Subsistema e patamar ssis = SUBSISTEMAS[int(i / 4)] str_pat = reg_pat.le_registro(linha, 4) pat = "Médio" if "Med" in str_pat else str_pat.split("_")[1] subsistemas.append(ssis) patamares.append(pat) # Semanas tabela[i, :] = reg_cmo.le_linha_tabela(linha, 11, 1, n_semanas) i += 1
def save_to_fh(self, fh: IO, save_if_non_dirty_too: bool = False) -> bool: result = False can_save = self._dirty if not self._dirty and save_if_non_dirty_too: can_save = True if can_save: # 1st row is JSON object with data for DirHash fh.write('{json}\n'.format(json=self.to_json())) fh.write('\n') # all the FileHash objects for file_hash in self._cache.values(): fh.write('{json}\n'.format(json=file_hash.to_json())) result = True return result
def transpile_class_constant_initialization(self, class_context: ClassContext, writer: IO): if class_context.cls.has_constants: writer.write(f"""\ if (0 == {class_context.constants_initialized_identifier}) {{ """) for const in class_context.cls.constants(): writer.write(f"""\ {{ uint8_t data[] = {{ {','.join(map(lambda b: str(b), const.data))} }}; int bytesRead; gd2c10->variant_decode({class_context.address_of_constant(const.name)}, data, {len(const.data)}, &bytesRead, true); }} """) writer.write(f"""\ {class_context.constants_initialized_identifier} = 1; }} """)
def add_transactions(f: typing.IO, transactions: List[Transaction], take_home: int, config: Dict): """Generate SankeyMatic strings from filtered transactions Args: f: output file transactions: list of all transactions take_home: total take home pay for the period config: config file """ start_date = datetime.strptime(config['time']['start_date'], '%m/%d/%Y') end_date = datetime.strptime(config['time']['end_date'], '%m/%d/%Y') filt_trans = filter_transactions( transactions=transactions, start_date=start_date, end_date=end_date, vendors=config['transactions']['ignore_vendors'], categories=config['transactions']['ignore_categories'], ignore=True, use_labels=config['transactions']['prefer_labels']) summed_categories = summarize_transactions( transactions=filt_trans, use_labels=config['transactions']['prefer_labels'], threshold=config['transactions']['category_threshold']) expenditure = 0 sorted_cat = sorted(summed_categories.items(), key=lambda kv: kv[1]) sorted_cat.reverse() for name, value in sorted_cat: if config['transactions']['use_percentages']: f.write(f'Take Home [{int(100 * value / take_home)}] {name}\n') else: f.write(f'Take Home [{value}] {name}\n') expenditure += value if config['transactions']['use_percentages']: savings = int(100 * (take_home - expenditure) / take_home) else: savings = take_home - expenditure f.write(f'Take Home [{savings}] Savings\n')
def create_proxysg_all_category_out_format(indicators_file: IO, files_by_category: dict): """write all indicators to file in proxysg format. Args: indicators_file (IO): the fields to return. files_by_category (dict): all indicators by category Returns: a file in proxysg format. """ for category, category_file in files_by_category.items(): indicators_file.write(f"define category {category}\n") category_file.seek(0) indicators_file.write(category_file.read()) category_file.close() indicators_file.write("end\n") return indicators_file
def pack(self, out: IO): """ Write the AttributeTable to the file-like object `out`. .. note:: Advanced usage only. You will typically never need to call this method as it will be called for you when saving a ClassFile. :param out: Any file-like object providing `write()` """ out.write(pack('>H', len(self._table))) for attribute in self: info = attribute.pack() out.write(pack( '>HI', attribute.name.index, len(info) )) out.write(info)
def stuff(a: IO) -> AnyStr: return a.readline()
def iter_ceph_ops(fd: IO): data = fd.read() offset = 0 while offset < len(data): op, offset = CephOp.unpack(data, offset) yield op
def embed_file(self, input_file: IO, output_file_path: str, output_format: str = "all", batch_size: int = DEFAULT_BATCH_SIZE, forget_sentences: bool = False, use_sentence_keys: bool = False) -> None: """ Computes ELMo embeddings from an input_file where each line contains a sentence tokenized by whitespace. The ELMo embeddings are written out in HDF5 format, where each sentence embedding is saved in a dataset with the line number in the original file as the key. Parameters ---------- input_file : ``IO``, required A file with one tokenized sentence per line. output_file_path : ``str``, required A path to the output hdf5 file. output_format : ``str``, optional, (default = "all") The embeddings to output. Must be one of "all", "top", or "average". batch_size : ``int``, optional, (default = 64) The number of sentences to process in ELMo at one time. forget_sentences : ``bool``, optional, (default = False). If use_sentence_keys is False, whether or not to include a string serialized JSON dictionary that associates sentences with their line number (its HDF5 key). The mapping is placed in the "sentence_to_index" HDF5 key. This is useful if you want to use the embeddings without keeping the original file of sentences around. use_sentence_keys : ``bool``, optional, (default = False). Whether or not to use full sentences as keys. By default, the line numbers of the input file are used as ids, which is more robust. """ assert output_format in ["all", "top", "average"] # Tokenizes the sentences. sentences = [line.strip() for line in input_file] blank_lines = [i for (i, line) in enumerate(sentences) if line == ""] if blank_lines: raise ConfigurationError(f"Your input file contains empty lines at indexes " f"{blank_lines}. Please remove them.") split_sentences = [sentence.split() for sentence in sentences] # Uses the sentence index as the key. if use_sentence_keys: logger.warning("Using sentences as keys can fail if sentences " "contain forward slashes or colons. Use with caution.") embedded_sentences = zip(sentences, self.embed_sentences(split_sentences, batch_size)) else: embedded_sentences = ((str(i), x) for i, x in enumerate(self.embed_sentences(split_sentences, batch_size))) sentence_to_index = {} logger.info("Processing sentences.") with h5py.File(output_file_path, 'w') as fout: for key, embeddings in Tqdm.tqdm(embedded_sentences): if use_sentence_keys and key in fout.keys(): raise ConfigurationError(f"Key already exists in {output_file_path}. " f"To encode duplicate sentences, do not pass " f"the --use-sentence-keys flag.") if not forget_sentences and not use_sentence_keys: sentence = sentences[int(key)] sentence_to_index[sentence] = key if output_format == "all": output = embeddings elif output_format == "top": output = embeddings[-1] elif output_format == "average": output = numpy.average(embeddings, axis=0) fout.create_dataset( str(key), output.shape, dtype='float32', data=output ) if not forget_sentences and not use_sentence_keys: sentence_index_dataset = fout.create_dataset( "sentence_to_index", (1,), dtype=h5py.special_dtype(vlen=str)) sentence_index_dataset[0] = json.dumps(sentence_to_index) input_file.close()
def write_echo_json(f: IO, obj: object) -> None: f.write("echo %s\n" % shlex.quote(json.dumps(obj)))