def _generate_one_batch(self, generator: 'models.GeneratorModel', batcher: Optional[batchers.Batcher] = None, src_batch: batchers.Batch = None, max_src_len: Optional[int] = None, fp: TextIO = None): """ Generate outputs for a single batch and write them to the output file. """ batch_size = len(src_batch) src_batches = batcher.pack(src_batch, None) src_batch = src_batches[0] src_len = src_batch.sent_len() if max_src_len is not None and src_len > max_src_len: output_txt = "\n".join([NO_DECODING_ATTEMPTED] * batch_size) fp.write(f"{output_txt}\n") else: with utils.ReportOnException({ "src": src_batch, "graph": utils.print_cg_conditional }): tt.reset_graph() with torch.no_grad( ) if xnmt.backend_torch else utils.dummy_context_mgr(): outputs = self.generate_one(generator, src_batch) if self.reporter: self._create_sent_report() for i in range(len(outputs)): output_txt = outputs[i].sent_str( custom_output_procs=self.post_processor) fp.write(f"{output_txt}\n")
def del_with_ul(tag: Element, file: TextIO, space_num=0, prefix=None): """ :param file: 文件 :param prefix: 前缀 :arg tag 包含ul的标签 :arg space_num 空格个数控制格式 tag的格式是 """ tag = etree.HTML( etree.tostring(tag, encoding="utf-8", pretty_print=True, method="html").decode()) li = tag.xpath("/html/body/ul/li") # 没有li标签 if not li: return elif len(li) == 0: return else: for k in li: tem_ = k.xpath("./text()") if tem_: if not prefix: line = " " * space_num + "* " + tem_[0].replace("\n", "") + "\n" file.write(line) # print(" " * space_num, "*", k.xpath("./text()")[0].replace("\n", "")) else: line = prefix + " " + " " * space_num + "* " + tem_[ 0].replace("\n", "") + "\n" file.write(line) # print(prefix, end="") # print(" " * space_num, "*", k.xpath("./text()")[0].replace("\n", "")) tem = k.xpath("./ul") if tem is not None and len(tem) > 0: del_with_ul(tem[0], space_num=space_num + 1, file=file)
def del_with_code_block(code_type: str, code: str, file: TextIO): other = ["vue", "xml", "html", "java"] code_ = code if code_type in other: code_ = code.replace("<", "<").replace(">", ">") line = "```" + code_type + "\n" + code_ + "\n```\n" file.write(line)
def _generate_one_batch(self, generator: 'models.GeneratorModel', batcher: Optional[batchers.Batcher] = None, src_batch: batchers.Batch = None, max_src_len: Optional[int] = None, fp: TextIO = None): """ Generate outputs for a single batch and write them to the output file. """ batch_size = len(src_batch) src_batches = batcher.pack(src_batch, None) src_batch = src_batches[0] src_len = src_batch.sent_len() if max_src_len is not None and src_len > max_src_len: output_txt = "\n".join([NO_DECODING_ATTEMPTED] * batch_size) fp.write(f"{output_txt}\n") else: with utils.ReportOnException({ "src": src_batch, "graph": utils.print_cg_conditional }): dy.renew_cg(immediate_compute=settings.IMMEDIATE_COMPUTE, check_validity=settings.CHECK_VALIDITY) outputs = self.generate_one(generator, src_batch) if self.reporter: self._create_sent_report() for i in range(len(outputs)): output_txt = outputs[i].sent_str( custom_output_procs=self.post_processor) fp.write(f"{output_txt}\n")
def write_record_child(f: TextIO, field: bigquery.SchemaField, prefix: str): f.write(f'view: {prefix}{field.name} {{\n') for fld in field.fields: write_field(f, fld) f.write('}\n\n') for fld in filter(lambda x: x.field_type == 'RECORD', field.fields): write_record_child(f, fld, f'{prefix}{field.name}__')
def del_with_h(line: str, level: int, file: TextIO): """ :arg """ # line_html = etree.HTML(line) # line = "".join(line_html.xpath("/html/body//text")).strip() # print(line) line = "#" * level + " " + line + "\n" file.write(line)
def save_to_file(self, txt_file: TextIO): """ Given the file output from an open('w') method, writes to the file the data within self """ for row_object in self.row_dict.values(): txt_file.write(row_object.tabulate()) txt_file.close() logging.debug( f'{type(self).__name__} object successfully saved to file')
def handle_packet(data: T.Union[str, bytes], f: Ti.TextIO): if isinstance(data, str): print(data) f.write(data + "\n") else: print( "unexpected binary data: ", data.decode("utf8", errors="ignore"), file=sys.stderr, )
def __write_header(self, file_handle: TextIO) -> None: """Write a file header for present settings. Arguments: file_handle: The open file to write to """ file_handle.write("# {0}\n".format(datetime.now().isoformat())) file_handle.write('# {}\n'.format(self._comment)) file_handle.write("# maximum voltage {0} V\n".format(self._max_voltage)) file_handle.write("# current limit {0} A\n".format(self._current_limit)) file_handle.write('# nplc {}\n'.format(self._nplc)) file_handle.write("Voltage Current\n")
def del_with_h2(s: str, level: int, file: TextIO): """ :arg """ # line_html = etree.HTML(line) # line = "".join(line_html.xpath("/html/body//text")).strip() # print(line) tem_s_html = etree.HTML(s) xpath = "//h{}//text()".format(level) tem_s = "".join(tem_s_html.xpath(xpath)).strip() tem_s = re.sub("\[.*?\]\(.*?\)", "", tem_s, flags=re.S) line = "#" * level + " " + tem_s + "\n" file.write(line)
def _write_disease_tensor_maps(phenos_folder: str, f: TextIO) -> None: f.write(f"\n\n# TensorMaps for MPG disease phenotypes\n") disease2tsv = get_disease2tsv(phenos_folder) logging.info(f"Got:{len(disease2tsv)} disease TSVs from:{phenos_folder}") status = disease_censor_status(disease2tsv, 1000000, 5000000) logging.info(f"Got status for all diseases.") for d in sorted(list(disease2tsv.keys())): total = len(status[d]) diseased = np.sum(list(status[d].values())) factor = int(total / (1 + diseased * 2)) f.write( f"{d} = TensorMap('{d}', Interpretation.CATEGORICAL, storage_type=StorageType.CATEGORICAL_FLAG, path_prefix='categorical', " f"channel_map={{'no_{d}':0, '{d}':1}}, loss=weighted_crossentropy([1.0, {factor}], '{d}'))\n", ) logging.info(f"Done writing TensorMaps for diseases.")
def _write_phecode_tensor_maps(f: TextIO, phecode_csv, db_client: DatabaseClient): # phecode_csv = '/home/sam/phecode_definitions1.2.csv' total_samples = 500000 remove_chars = ";.,/()-[]&' " phecode2phenos = {} with open(phecode_csv, 'r') as my_csv: lol = list(csv.reader(my_csv, delimiter=',')) for row in lol[1:]: pheno = row[1].strip().replace("'s", "s") for c in remove_chars: pheno = pheno.replace(c, '_') pheno = pheno.lower().strip('_').replace('___', '_').replace('__', '_') phecode2phenos['phecode_' + row[0].lstrip('0').strip()] = pheno query = f"select disease, count(disease) as total from `broad-ml4cvd.ukbb7089_201904.phecodes_nonzero` GROUP BY disease" count_result = db_client.execute(query) phecode2counts = {} for row in count_result: phecode2counts[row['disease']] = float(row['total']) f.write(f"\n\n# TensorMaps for Phecode disease phenotypes\n") for k, p in sorted(phecode2phenos.items(), key=operator.itemgetter(1)): if k in phecode2counts: factor = int(total_samples / (1 + phecode2counts[k])) f.write( f"{p}_phe = TensorMap('{k}', Interpretation.CATEGORICAL, channel_map={{'no_{p}':0, '{p}':1}}, path_prefix='categorical', " f"storage_type=StorageType.CATEGORICAL_FLAG, loss=weighted_crossentropy([1.0, {factor}], '{k.replace('.', '_')}'))\n", ) query = f"select disease, count(disease) as total from `broad-ml4cvd.ukbb7089_201904.phecodes_nonzero` WHERE prevalent_disease=1 GROUP BY disease" count_result = db_client.execute(query) phecode2prevalent = {} for row in count_result: phecode2prevalent[row['disease']] = float(row['total']) query = f"select disease, count(disease) as total from `broad-ml4cvd.ukbb7089_201904.phecodes_nonzero` WHERE incident_disease=1 GROUP BY disease" count_result = db_client.execute(query) phecode2incident = {} for row in count_result: phecode2incident[row['disease']] = float(row['total']) f.write( f"\n\n# TensorMaps for prevalent and incident Phecode disease phenotypes\n" ) for k, p in sorted(phecode2phenos.items(), key=operator.itemgetter(1)): if k in phecode2incident and k in phecode2prevalent: factor_i = int(total_samples / (1 + phecode2incident[k])) factor_p = int(total_samples / (1 + phecode2prevalent[k])) f.write( f"{p}_phe_pi = TensorMap('{k}', Interpretation.CATEGORICAL, storage_type=StorageType.CATEGORICAL_FLAG, " f"path_prefix='categorical', tensor_from_file=prevalent_incident_tensor('dates/enroll_date', 'dates/{k}_date'), " f"channel_map={{'no_{p}':0, '{p}_prevalent':1, '{p}_incident':2}}, " f"loss=weighted_crossentropy([1.0, {factor_p}, {factor_i}], '{p}_pi'))\n", )
def write_text_record(stream: TextIO, record: MarcRecord) -> None: """ Сохранение записи в файл в текстовом обменном формате ИРБИС. :param stream: Файл :param record: Запись :return: None """ assert stream assert record for field in record.fields: parts = ['#' + str(field.tag) + ': ' + safe_str(field.value)] for subfield in field.subfields: parts.extend(str(subfield)) line = ''.join(parts) + '\n' stream.write(line) stream.write(STOP_MARKER + '\n')
def to_txt(self, f: TextIO, expression_data=None): if expression_data is None: from copy import copy expression_data = copy(self.joined) columns = expression_data.columns pandas.options.mode.chained_assignment = None expression_data['Description'] = 'na' expression_data = expression_data[['Description', *columns]] if type(expression_data.index[0]) is bytes: expression_data.index = [b.decode('utf-8') for b in expression_data.index] expression_data.index = expression_data.index.astype(str) expression_data.index.name = 'gene' header = '\t'.join([expression_data.index.name, *expression_data.columns]) + '\n' f.write(header) savetxt( f, expression_data.reset_index().values, delimiter='\t', # entrez id or symbol (as str), 'na' (not a description, str), *data (floats) fmt='%s\t%s' + ('\t%f' * (len(expression_data.columns) - 1)) )
def del_with_blockquote(tag: Element, file: TextIO): """ :param file: :arg tag 含有blockquote的标签 """ children = tag.xpath("/html/body/blockquote/*") for i in children: sub = etree.tostring(i, encoding="utf-8", pretty_print=True, method="html").decode() sub_html = etree.HTML(sub) tag_p = sub_html.xpath("/html/body/p") if tag_p: # line_to_print = "".join(tag_p[0].xpath(".//text()")).replace("\n", "") line = ">" + "".join(tag_p[0].xpath(".//text()")).replace( "\n", "") + "\n" file.write(line) # print(">", "".join(tag_p[0].xpath(".//text()")).replace("\n", "")) else: del_with_ul(sub_html, prefix=">", file=file)
def _generate_one_batch(self, generator: 'models.GeneratorModel', batcher: Optional[batchers.Batcher] = None, src_batch: batchers.Batch = None, ref_batch: Optional[batchers.Batch] = None, assert_scores: Optional[List[int]] = None, max_src_len: Optional[int] = None, fp: TextIO = None): """ Generate outputs for a single batch and write them to the output file. """ batch_size = len(src_batch) if ref_batch[0] is not None: src_batches, ref_batches = batcher.pack(src_batch, ref_batch) ref_batch = ref_batches[0] else: src_batches = batcher.pack(src_batch, None) ref_batch = None src_batch = src_batches[0] src_len = src_batch.sent_len() if max_src_len is not None and src_len > max_src_len: output_txt = "\n".join([NO_DECODING_ATTEMPTED] * batch_size) fp.write(f"{output_txt}\n") else: with utils.ReportOnException({"src": src_batch, "graph": utils.print_cg_conditional}): dy.renew_cg(immediate_compute=settings.IMMEDIATE_COMPUTE, check_validity=settings.CHECK_VALIDITY) outputs = self.generate_one(generator, src_batch, ref_batch) if self.reporter: self._create_sent_report() for i in range(len(outputs)): if assert_scores[0] is not None: # If debugging forced decoding, make sure it matches assert batch_size == len(outputs), "debug forced decoding not supported with nbest inference" if (abs(outputs[i].score - assert_scores[i]) / abs(assert_scores[i])) > 1e-5: raise ValueError( f'Forced decoding score {outputs[i].score} and loss {assert_scores[i]} do not match at ' f'sentence {i}') output_txt = outputs[i].sent_str(custom_output_procs=self.post_processor) fp.write(f"{output_txt}\n")
def _write_disease_tensor_maps_incident_prevalent(phenos_folder: str, f: TextIO) -> None: f.write( f"\n\n# TensorMaps for prevalent and incident MPG disease phenotypes\n" ) disease2tsv = get_disease2tsv(phenos_folder) logging.info(f"Got:{len(disease2tsv)} disease TSVs from:{phenos_folder}") status_p = disease_prevalence_status(disease2tsv, 1000000, 2500000) status_i = disease_incidence_status(disease2tsv, 1000000, 2500000) logging.info(f"Got prevalence and incidence status for all diseases.") for disease in sorted(list(disease2tsv.keys())): total = len(status_p[disease]) diseased_p = np.sum(list(status_p[disease].values())) factor_p = int(total / (1 + (diseased_p * 3))) diseased_i = np.sum(list(status_i[disease].values())) factor_i = int(total / (1 + (diseased_i * 3))) f.write( f"{disease}_prevalent_incident = TensorMap('{disease}', Interpretation.CATEGORICAL, storage_type=StorageType.CATEGORICAL_FLAG, " f"path_prefix='categorical', tensor_from_file=prevalent_incident_tensor('dates/enroll_date', 'dates/{disease}_date'), " f"channel_map={{'no_{disease}':0, 'prevalent_{disease}':1, 'incident_{disease}':2}}, " f"loss=weighted_crossentropy([1.0, {factor_p}, {factor_i}], '{disease}_prevalent_incident'))\n", ) logging.info( f"Done writing TensorMaps for prevalent and incident diseases.")
def del_with_table(sub: Element, file: TextIO): col_num = 0 col_name = sub.xpath("//thead/tr/th/text()") col_num = len(col_name) row_head = "| " + " | ".join(col_name) + " |\n" file.write(row_head) # print(row_head) tem_list = "| " + " | ".join([":-----:" for i in range(col_num)]) + " |\n" # print(tem_list) file.write(tem_list) # 解决表体 trs = sub.xpath("//tbody/tr") for tr in trs: row = tr.xpath("./td/text()") row_each = "| " + " | ".join(row).replace("\n", "") + " |\n" file.write(row_each)
def write_absolute_dir_to_file(dir_name, file: TextIO): relative_child_dir = "/".join( dir_name.split("/")[settings.init_dir_offset:]) dir_name = relative_child_dir.split("/")[-1] offset = len(relative_child_dir.split("/")) whitespace_offset = " " * (offset - 1) if (dir_name in settings.terminal_dir) or (dir_name in settings.semiterminal_dir): if dir_name in settings.external_repo: link = "[{}]({})".format(dir_name, settings.external_repo.get(dir_name)) else: link = "[{}]({})".format(dir_name, relative_child_dir) #print("Directory name: {}, Link: {}".format(dir_name, link)) file.write(whitespace_offset + "* " + link) else: #print("Directory name: {}, Link: {}".format(dir_name, None)) file.write(whitespace_offset + "* " + dir_name) file.write("\n")
def __write_header(self, file_handle: TextIO) -> None: file_handle.write("# {0}\n".format(datetime.now().isoformat())) file_handle.write('# {}\n'.format(self._comment)) file_handle.write('# {} Hz\n'.format(self._device.freq)) file_handle.write('# {} V\n'.format(self._device.slvl)) file_handle.write('# {} Time constant\n'.format(self._device.oflt)) file_handle.write("# pre resistance {0} OHM\n".format( self._pre_resistance)) file_handle.write("# sweep rate {0} T/min\n".format(self._sweep_rate)) file_handle.write( "Datetime Field Real Imaginary Amplitude Theta Sensitivity T1 T2 T3\n" )
def hash_files(docs: Iterable[DocFile], out: TextIO): out.write('{\n') out.write('"generator": "stramp",\n') out.write('"documents": [\n') first = True for doc in docs: if doc.file_format == 'org': from stramp.parsers.org_parser import load_file as load_org_file load_org_file(doc) elif doc.file_format in ('commonmark', 'markdown'): from stramp.parsers.markdown_parser import load_file as load_markdown_file load_markdown_file(doc) else: raise ValueError(f'Unsupported file format {doc.file_format!r}') with io.StringIO() as file_json: # type: Union[TextIO, io.StringIO] write_file_hash_json(doc, file_json) if not first: out.write(',\n') first = False out.write(file_json.getvalue()) out.write(']}\n')
def analyse_od(model: str, dataset: str, split: str, pivot_file: TextIO): """ TODO """ if split == "kh": return source_dataset = load_dataset(f"{dataset}.txt") label_indices = get_label_indices(source_dataset) numeric_labels = list(range(len(label_indices))) num_labels = len(numeric_labels) split_name = split if split != "kh" else f"kh-{model}" split_path = f"{dataset}.strat-0.15.{split_name}.splits" holdout_dataset = load_dataset(os.path.join(split_path, "holdout.txt")) schedule_dataset = load_dataset(os.path.join(split_path, "schedule.txt")) y_true = [label_indices[label] for label in holdout_dataset.values()] splitter = TopNSplitter(50) iteration = 0 cumulative_corrections = 0 _, remaining_dataset = splitter(schedule_dataset) while True: holdout_predictions_path = os.path.join( split_path, f"{model}/{iteration}/predictions") if not os.path.exists(holdout_predictions_path): break holdout_predictions = load_rois_predictions(holdout_predictions_path, holdout_dataset, num_labels) y_score = list(holdout_predictions.values()) y_score = [ coerce_incorrect(num_labels, truth, prediction) for truth, prediction in zip(y_true, y_score) ] top_1 = top_k_accuracy_score(y_true, y_score, k=1, labels=numeric_labels, normalize=True) pivot_file.write(",".join( map(str, [ model, dataset, split, iteration, "holdout", "accuracy", top_1 ])) + "\n") update_dataset, remaining_dataset = splitter(remaining_dataset) update_predictions_path = os.path.join( split_path, f"{model}/{iteration}/update_predictions") if os.path.exists(update_predictions_path): update_y_true = [ label_indices[label] for label in update_dataset.values() ] update_predictions = load_rois_predictions(update_predictions_path, update_dataset, num_labels) update_y_score = list(update_predictions.values()) update_y_score = [ coerce_incorrect(num_labels, truth, prediction) for truth, prediction in zip(update_y_true, update_y_score) ] update_top_1 = top_k_accuracy_score(update_y_true, update_y_score, k=1, labels=numeric_labels, normalize=True) pivot_file.write(",".join( map(str, [ model, dataset, split, iteration, "update", "accuracy", update_top_1 ])) + "\n") cumulative_corrections += int((1 - update_top_1) * 50) pivot_file.write(",".join( map(str, [ model, dataset, split, iteration, "update", "cumulative_corrections", cumulative_corrections ])) + "\n") iteration += 1
def __write_header(self, file_handle: TextIO) -> None: file_handle.write("# {0}\n".format(datetime.now().isoformat())) file_handle.write('# {}\n'.format(self._comment)) file_handle.write('# {} V\n'.format(self._voltage)) file_handle.write('# {} A-max\n'.format(self._current_limit)) file_handle.write("# sweep rate {0} K/min\n".format(self._sweep_rate)) file_handle.write("Datetime Voltage Current T1 T2 T3\n")
def write_look_ml(f: TextIO, info: TableInfo): # write view f.write(f'view: {info.dataset_id}__{info.clear_name} {{\n') f.write( f' sql_table_name: `{info.project_id}.{info.dataset_id}.{info.clear_name}' ) if info.is_sharding(): f.write('_*') f.write('`\n ;;\n\n') for field in info.schema: write_field(f, field) # measure count f.write(' measure: count {\n') f.write(' type: count\n') drill_fields: List[str] = [] if 'id' in [field.name for field in info.schema]: drill_fields.append('id') if 'name' in [field.name for field in info.schema]: drill_fields.append('name') f.write(f' drill_fields: [{", ".join(drill_fields)}]\n') f.write(' }\n') f.write('}\n\n') for field in filter(lambda x: x.field_type == 'RECORD', info.schema): write_record_child(f, field, prefix=f'{info.dataset_id}__{info.clear_name}__')
def del_with_p(data, file: TextIO): line = "".join(data[0].xpath(".//text()")).replace("\n", "") + "\n" file.write(line)
def to_gct(self, f: TextIO, tabular_writer='to_txt'): f.write('#1.2\n') expression_data = self.joined assert expression_data.notnull().all().all() f.write(f'{len(expression_data)}\t{len(expression_data.columns)}\n') getattr(self, tabular_writer)(f, expression_data)
def write_field(f: TextIO, field: bigquery.SchemaField): if field.field_type in ['TIME', 'TIMESTAMP', 'DATE', 'DATETIME']: f.write(f' dimension_group: {field.name} {{\n') else: f.write(f' dimension: {field.name} {{\n') if field.name == 'id': f.write(' primary_key: yes\n') if field.field_type in ['INTEGER', 'FLOAT', 'NUMERIC']: f.write(' type: number\n') elif field.field_type == 'BOOLEAN': f.write(' type: yesno\n') elif field.field_type in ['TIME', 'TIMESTAMP', 'DATE', 'DATETIME']: f.write(' type: time\n') f.write(' timeframes: [\n') f.write(' raw,\n') if field.field_type != 'DATE': f.write(' time,\n') f.write(' date,\n') f.write(' week,\n') f.write(' month,\n') f.write(' quarter,\n') f.write(' year\n') f.write(' ]\n') if field.field_type == 'DATE': f.write(' convert_tz: no\n') f.write(' datatype: date\n') elif field.field_type == 'RECORD': f.write(' hidden: yes\n') else: f.write(' type: string\n') f.write(f' sql: ${{TABLE}}.{field.name} ;;\n') f.write(' }\n\n')
def write_svg(self, f: TextIO): f.write(f'<{self.diagram_item.name}') for name, value in sorted(self.diagram_item.attrs.items()): f.write(f' {name}="{e(value)}"') f.write(f' data-dbg-cls="{self.diagram_item.__class__.__name__}"' f' data-dbg-w="{self.diagram_item.width}"') f.write('>') for child in self.children: if isinstance(child, FormattedItem): child.write_svg(f) else: f.write(e(child)) f.write(f'</{self.diagram_item.name}>')
def save_history_for_conversation(self,f : TextIO,id : int = 0): self.update_status_history_for_conversation(id) dump(self.status_per_conversation[id],f) f.write('\n')
def format(self, f: TextIO) -> None: width: List[int] = [] columns: List[str] = [] rows: List[List[str]] = [] for column in self.row_set.columns(): name = str(column.name) columns.append(name) width.append(len(name)) for row in self.row_set.iter(): expressions: List[str] = [] for (column, cell) in zip(self.row_set.columns(), row.data): expr: ExpressionLiteral = column.ty.construct(cell) sql = expr.to_sql() expressions.append(sql) rows.append(expressions) for row in rows: for (i, cell) in enumerate(row): width[i] = max(len(cell), width[i]) f.write(' ') # shift to the right for the sliding effect f.write( self.separator.join(c.center(w) for (c, w) in zip(columns, width))) f.write(os.linesep) f.write(self.separator.join(''.ljust(w, '/') for w in width)) f.write(os.linesep) for row in rows: f.write( self.separator.join(c.ljust(w) for (c, w) in zip(row, width))) f.write(os.linesep)