def check_spans(doc_fn, tag_fn, options): doc_count, span_count, mismatches = 0, 0, 0 with open_file(doc_fn, 'r', options) as doc_f: doc_reader = DocReader(doc_f) with open_file(tag_fn, 'r', options) as tag_f: span_reader = SpanReader(tag_f) for doc in doc_reader: for span in span_reader.document_spans(doc.id): doc_span_text = doc.text[span.start:span.end + 1] if doc_span_text != span.text: dt, st = safe_str(doc_span_text), safe_str(span.text) print(f'text mismatch in {doc.id}: "{dt}" ' f'vs "{st}: {span}"') mismatches += 1 span_count += 1 doc_count += 1 if doc_count % 10000 == 0: print( f'processed {doc_count} documents ' f'({span_count} spans)', file=sys.stderr) span_count, errors = span_reader.iter.index - 1, span_reader.errors if span_reader.current_doc_id() is not None: print(f'ERROR: extra lines in {tag_fn}') if mismatches or errors: print(f'Checked {span_count} spans, found {errors} errors ' f'and {mismatches} mismatches') else: print(f'OK, checked {span_count} spans')
def cut_tags(doc_fn, tag_fn, out_fn, options): removed, total = 0, 0 with open_file(doc_fn, 'r', options) as doc_f: doc_reader = DocReader(doc_f) with open_file(tag_fn, 'r', options) as tag_f: span_reader = SpanReader(tag_f, no_type_mapping=True) with open_file(out_fn, 'w', options) as out_f: for doc_idx, doc in enumerate(doc_reader): offset_map = get_offset_map(doc, options) if offset_map is None: # no-op, quick copy without parsing for span in span_reader.document_lines(doc.id): print(span, end='', file=out_f) total += 1 else: # need to parse, map and filter spans = list(span_reader.document_spans(doc.id)) mapped = apply_offset_map(spans, offset_map) removed += len(spans) - len(mapped) total += len(spans) for span in mapped: print(span, file=out_f) if (doc_idx+1) % 100000 == 0: print(f'processed {doc_idx+1} documents', file=sys.stderr) print(f'removed {removed}/{total} spans ({removed/total:.1%})', file=sys.stderr)
def cut_documents(doc_fn, out_fn, options): cut_count = 0 with open_file(doc_fn, 'r', options) as doc_f: doc_reader = DocReader(doc_f) with open_file(out_fn, 'w', options) as out_f: for doc_idx, doc in enumerate(doc_reader): cut_count += cut_document(doc, options) print(doc, file=out_f) if (doc_idx + 1) % 100000 == 0: print(f'processed {doc_idx+1} documents', file=sys.stderr) print(f'cut {cut_count}/{doc_idx} documents ({cut_count/doc_idx:.1%})', file=sys.stderr)
def filter_tags(tag_fn, out_fn, ids, options): out_count = 0 with open_file(tag_fn, 'r', options) as tag_f: with open_file(out_fn, 'w', options) as out_f: for ln, line in enumerate(tag_f, start=1): id_ = line.split('\t')[0] if id_ in ids: print(line, file=out_f, end='') out_count += 1 if ln % 100000 == 0: print(f'processed {ln}, output {out_count}', file=sys.stderr) print(f'output {out_count}/{ln} lines ({out_count/ln:.1%})', file=sys.stderr)
def filter_documents(doc_fn, out_fn, ids, options): out_count = 0 with open_file(doc_fn, 'r', options) as doc_f: doc_reader = DocReader(doc_f) with open_file(out_fn, 'w', options) as out_f: for doc_idx, doc in enumerate(doc_reader): if doc.id in ids: print(doc, file=out_f, flush=True) out_count += 1 if (doc_idx + 1) % 100000 == 0: print(f'processed {doc_idx+1}, output {out_count}', file=sys.stderr) print(f'output {out_count}/{doc_idx} documents ({out_count/doc_idx:.1%})', file=sys.stderr)
def convert_to_standoff(doc_fn, tag_fn, out_dir, options): NOTE_TYPE = 'AnnotatorNotes' with open_file(doc_fn, 'r', options) as doc_f: doc_reader = DocReader(doc_f) with open_file(tag_fn, 'r', options) as tag_f: # Read spans that include source information span_reader = SpanReader(tag_f, source=True) for doc in doc_reader: spans = list(span_reader.document_spans(doc.id)) try: convert_single(doc, spans, out_dir, options) except Exception as e: error(f'failed to convert {doc.id}: {e}') raise
def modify_init_rc(directory): lines = c.get_lines_from_file(directory, 'init.rc') f = c.open_file(directory, 'init.rc', c.WRITE) for line in lines: if 'export ANDROID_ROOT' in line: c.write(f, line) c.write(f, c.whitespace(line) + "export ANDROID_CACHE /cache\n") elif re.search(r"mkdir /system(\s|$)", line): c.write(f, line) c.write(f, re.sub("/system", "/raw-system", line)) elif re.search(r"mkdir /data(\s|$)", line): c.write(f, line) c.write(f, re.sub("/data", "/raw-data", line)) elif re.search(r"mkdir /cache(\s|$)", line): c.write(f, line) c.write(f, re.sub("/cache", "/raw-cache", line)) elif 'yaffs2' in line: c.write(f, re.sub(r"^", "#", line)) else: c.write(f, line) f.close()
def modify_init_target_rc(directory): lines = c.get_lines_from_file(directory, 'init.target.rc') previous_line = "" f = c.open_file(directory, 'init.target.rc', c.WRITE) for line in lines: if re.search(r"^\s+wait\s+/dev/.*/cache.*$", line): c.write(f, re.sub(r"^", "#", line)) elif re.search(r"^\s+check_fs\s+/dev/.*/cache.*$", line): c.write(f, re.sub(r"^", "#", line)) elif re.search(r"^\s+mount\s+ext4\s+/dev/.*/cache.*$", line): c.write(f, re.sub(r"^", "#", line)) elif re.search(r"^\s+mount_all\s+fstab.qcom.*$", line) and \ re.search(r"^on\s+fs\s*$", previous_line): c.write(f, line) c.write(f, c.whitespace(line) + "exec /sbin/busybox-static sh /init.dualboot.mounting.sh\n") else: c.write(f, line) previous_line = line f.close()
def modify_fstab(directory): # Ignore all contents for Google Edition for i in [ 'fstab.qcom', 'fstab.jgedlte' ]: lines = c.get_lines_from_file(directory, i) has_cache_line = False f = c.open_file(directory, i, c.WRITE) for line in lines: if re.search(r"^/dev[a-zA-Z0-9/\._-]+\s+/system\s+.*$", line): c.write(f, "/dev/block/platform/msm_sdcc.1/by-name/system /raw-system ext4 ro,errors=panic wait\n") elif re.search(r"^/dev[^\s]+\s+/cache\s+.*$", line): c.write(f, "/dev/block/platform/msm_sdcc.1/by-name/cache /raw-cache ext4 nosuid,nodev,barrier=1 wait,check\n") has_cache_line = True elif re.search(r"^/dev[^\s]+\s+/data\s+.*$", line): c.write(f, "/dev/block/platform/msm_sdcc.1/by-name/userdata /raw-data ext4 nosuid,nodev,noatime,noauto_da_alloc,discard,journal_async_commit,errors=panic wait,check,encryptable=footer\n") else: c.write(f, line) if not has_cache_line: c.write(f, "/dev/block/platform/msm_sdcc.1/by-name/cache /raw-cache ext4 nosuid,nodev,barrier=1 wait,check\n") f.close()
def modify_fstab(directory): lines = c.get_lines_from_file(directory, 'fstab.qcom') # For Android 4.2 ROMs has_cache_line = False f = c.open_file(directory, 'fstab.qcom', c.WRITE) for line in lines: if re.search(r"^/dev[a-zA-Z0-9/\._-]+\s+/system\s+.*$", line): c.write(f, re.sub("\s/system\s", " /raw-system ", line)) elif re.search(r"^/dev[^\s]+\s+/cache\s+.*$", line): c.write(f, re.sub("\s/cache\s", " /raw-cache ", line)) has_cache_line = True elif re.search(r"^/dev[^\s]+\s+/data\s+.*$", line): c.write(f, re.sub("\s/data\s", " /raw-data ", line)) else: c.write(f, line) if not has_cache_line: c.write(f, "/dev/block/platform/msm_sdcc.1/by-name/cache /raw-cache ext4 nosuid,nodev,barrier=1 wait,check") f.close()
def main(): """ Main function Initiate argument parser """ # Parse command line arguments parser = argparse.ArgumentParser( description='Position occurrences of search words in file and produce latex output and statistics. Default settings in settings.py-file') parser.add_argument('input', nargs='?', help='The file to be parsed. If none is given, the settings-file is used.') parser.add_argument('output', nargs='?', help='Full filename of the selected output.') parser.add_argument('--stats', '-s', help='(Not implemented yet). Calculate and output statistics. Default = false.', action='store_true', default=False) parser.add_argument('--passages', '-p', help='Print all passages and exceptions in search terms to file. Default = false.', action='store_true', default=False) parser.add_argument('--tex', '-t', help='Create output to LaTeX file with TikZ formatted diagram. Choose whether it goes to shell, file or both. Default = shell.', action='store', choices=['shell', 'file', 'both', 'none'], default='shell') parser.add_argument('--log', '-l', help='Set the log level (output to shell). Default = WARNING.', default='INFO') # Parse command line arguments args = parser.parse_args() # Set log level and initiate logging module loglevel = args.log logging.basicConfig(level=getattr(logging, loglevel.upper())) logging.getLogger(__name__) # Read the string string = common.open_file(args.input) names, terms, exceptions = separate_terms(settings.terms) occurrences = create_occurrence_lists(terms, exceptions, string) output_data = prepare_diagram_data(occurrences, names, string) if args.tex is not 'none': render_tex(args.tex, set_filename(args.output, '.tex'), *output_data) if args.passages: render_passages_in_html(terms, exceptions, string, set_filename("/tmp/output_dump.html", '.html'))
def modify_MSM8960_lpm_rc(directory): lines = c.get_lines_from_file(directory, 'MSM8960_lpm.rc') f = c.open_file(directory, 'MSM8960_lpm.rc', c.WRITE) for line in lines: if re.search(r"^\s+mount.*/cache.*$", line): c.write(f, re.sub(r"^", "#", line)) else: c.write(f, line) f.close()
def convert_single(doc, spans, out_dir, options): for span in spans: span.type = normalize_type(span.type) spans = deduplicate_spans(spans, options) with open_file(os.path.join(out_dir, f'{doc.id}.txt'), 'w', options) as f: print(doc.text.replace('\t', '\n'), file=f) offset_map = make_offset_map(doc.text) with open_file(os.path.join(out_dir, f'{doc.id}.ann'), 'w', options) as f: n = 1 for i, span in enumerate(spans, start=1): s, e = span.start, span.end + 1 # end-exclusive s, e = offset_map[s], offset_map[e] # char offsets if len(span.sources) == 2: # assume two sources t = f'{span.type}' else: t = f'{span.type}-{span.source}' print(f'T{i}\t{t} {s} {e}\t{span.text}', file=f) for serial in span.serials: if serial != DUMMY_SERIAL: print(f'N{n}\tReference T{i} string:{serial}', file=f) n += 1
def modify_init_qcom_rc(directory): lines = c.get_lines_from_file(directory, 'init.qcom.rc') f = c.open_file(directory, 'init.qcom.rc', c.WRITE) for line in lines: # Change /data/media to /raw-data/media if re.search(r"/data/media(\s|$)", line): c.write(f, re.sub('/data/media', '/raw-data/media', line)) else: c.write(f, line) f.close()
def xconf_iter(self, fname): # {{{1 # type: (Text) -> Iterable[Tuple[int, Text, Text]] self.section_parser_clear() flagsSyn = XSectionSynaptics() with common.open_file(fname, "r") as fp: for i, line in enumerate(fp): sec = self.section_parser(line) if sec >= 0: flagsSyn.parse_line(line) yield (i, self.cur_section, line) else: flagsSyn = XSectionSynaptics() yield (i, "", line)
def modify_init_qcom_rc(directory): lines = c.get_lines_from_file(directory, 'init.qcom.rc') f = c.open_file(directory, 'init.qcom.rc', c.WRITE) for line in lines: if 'export EMULATED_STORAGE_TARGET' in line: c.write(f, line) c.write(f, c.whitespace(line) + "export EXTERNAL_SD /storage/sdcard1\n") # Change /data/media to /raw-data/media elif re.search(r"/data/media(\s|$)", line): c.write(f, re.sub('/data/media', '/raw-data/media', line)) else: c.write(f, line) f.close()
def main(): from os import path from common import open_file if path.exists('list.txt'): download_list = open_file('list.txt').split('\n') from common import url_to_module, start_download, test_download for i in download_list: if len(i) > 1 and i[0] == '#': continue m, url = url_to_module(i) if m is not None: info = m.get_info(url) # start_download(m, info) test_download(m, info) # book_name = m.get_m80txt_info(url, True) # if len(book_name) > 1: # test_download(m, book_name, info) exit(0) pass else: start_main()
def process(self, start=0, stop=None): assert os.path.exists(self.path), "'%s' does not exist!" % (file) data = [] # cmdline = 'cat %s | diag_parser' % (file) # ret, stdout, stderr = pycommons.run(cmdline, log=False) # lines = stdout.split('\n') lines = [] with open_file(self.path, "rb") as f: for line in f: lines.append(line.strip()) for idx in range(len(lines)): line = lines[idx] m = self.START_PATTERN.match(line) if m: obj_data = [] while idx < len(lines): line = lines[idx] obj_data.append(lines[idx]) m = self.END_PATTERN.match(line) if m: break idx += 1 obj = self.parse(obj_data) if not obj: # logger.debug("Could not convert: \n%s\n" % ('\n'.join(obj_data))) pass else: if obj.os_timestamp > start: if not stop or (stop and obj.os_timestamp < stop): data.append(obj) idx += 1 self.data = data
def cmdreport(self): # {{{2 # type: () -> None import sys import platform from datetime import datetime fname = datetime.now().strftime("report-%Y%m%d-%H%M%S.txt") fp = open_file(fname, "a") msg = common.check_output(["uname", "-a"]) fp.write(msg + "\n") msg = common.check_output(["python3", "-m", "platform"]) fp.write(msg + "\n") fp.write("Python: {}\n".format(str(sys.version_info))) if sys.version_info[0] == 2: sbld = platform.python_build() # type: ignore scmp = platform.python_compiler() # type: ignore else: sbld = platform.python_build() scmp = platform.python_compiler() fp.write("Python: {} {}\n".format(sbld, scmp)) msg = common.check_output(["xinput", "list"]) fp.write(msg + u"\n") msg = common.check_output(["xinput", "list-props", Text(xi.dev)]) fp.write(msg + u"\n") fp.write(u"\n\n--- current settings (in app)---\n") fp.write(xi.dumps()) fp.write(u"\n\n--- initial settings (at app startup)---") cmds = u"" for i in cmdorg: cmds += u"\n" + u" ".join(i) fp.write(cmds + "\n") fp.close() msg = u"Report: {} was made,\n" \ u"use this file to report a issue.".format(fname) messagebox.showinfo(u"Make a Report", msg)
def save_package(package, path): json.dump(package, open_file(path, 'w'), indent=2)
def save(self, fname, fnameIn, db): # {{{1 # type: (Text, Text, NPropDb) -> bool '''sample output {{{3 # Example xorg.conf.d snippet that assigns the touchpad driver # ... Section "InputClass" Identifier "touchpad catchall" Driver "synaptics" MatchIsTouchpad "on" Option "TapButton3" "2" Option "FingerLow" "50" Option "FingerHigh" "100" Option "VertTwoFingerScroll" "on" Option "HorizTwoFingerScroll" "on" # This option is recommend on all Linux systems using evdev, # ... MatchDevicePath "/dev/input/event*" EndSection # This option enables the bottom right corner to be a right button # ... Section "InputClass" Identifier "Default clickpad buttons" MatchDriver "synaptics" Option "SoftButtonAreas" "50% 0 82% 0 0 0 0 0" Option "SecondarySoftButtonAreas" "58% 0 0 15% 42% 58% 0 15%" EndSection # }}} ''' fp = common.open_file(fname, "w") prv_sec = "" done = [] # type: List[Text] for i, sec, line in self.xconf_iter(fnameIn): if len(sec) < 1: if len(prv_sec) > 0: self.save_remains(fp, db, prv_sec, done) prv_sec, done = "", [] fp.write(line) continue prv_sec = sec for i in range(1): tup = NProp.parse_xconfline(line) if tup is None: continue # write through prop = tup[1] cur = db.get(sec, prop, NProp("", None, "")) if cur.key == "": done.append(tup[0]) continue # write thruogh if prop.same_prop(cur): done.append(tup[0]) continue # write thruogh # just update props, write at save_remains(). cur.update_by_prop_passive(prop) break else: fp.write(line) # did not close section... fp.close() return False
def save(file_obj, dependencies, path): file_obj = insert_dependencies(file_obj, dependencies) json.dump(file_obj, open_file(path, 'w'), indent=2)
def button_clicked(self, widget): value = self.entry.get_text() common.open_file(value, self._window)
def get_file(package_path): try: return json.load(open_file(package_path)) except Exception as ex: logging.error(Fore.RED + 'ERR: ' + str(ex)) raise
if __name__ == '__main__': import sys from optparse import OptionParser from pprint import pprint from common import to_date pars = OptionParser() pars.add_option('-u', '--username') pars.add_option('-p', '--password') pars.add_option('-d', '--date', help='dd-mm-yyyy') pars.add_option('', '--mutasi-file') pars.add_option('', '--saldo-file') pars.add_option('', '--output-file') option, remain = pars.parse_args(sys.argv[1:]) if option.mutasi_file: content = open_file(option.mutasi_file) parser = MutasiParser() parser.feed(content) pprint(parser.get_clean_data()) sys.exit() if option.saldo_file: content = open_file(option.saldo_file) parser = SaldoParser() parser.feed(content) pprint(parser.get_clean_data()) sys.exit() if not option.username or not option.password: print('--username dan --password harus diisi') sys.exit()