def test_cut(seg, path, encoding='utf-8'): test_file = open(path, encoding=encoding) p = 0 r = 0 cnt = 0 texts = list() time_start = time.time() for row in test_file.readlines(): if row.strip() == '': continue text = re.sub('(/[a-z,A-Z]+)* *', '', row).strip() texts.append(text) words_cut = seg.cut(text) words_norm = list(map(utils.get_word_from_wordpos, row.split())) words_cut_cnt = len(words_cut) words_norm_cnt = len(words_norm) words_occur_cnt = len([1 for word in words_cut if word in words_norm]) p += words_occur_cnt / words_cut_cnt r += words_occur_cnt / words_norm_cnt cnt += 1 precision_rate = p / cnt recall_rate = r / cnt cutword_speed = utils.get_filesize(path) / (time.time() - time_start) print("精确率:", precision_rate) print("召回率:", recall_rate) print("分词速度:", cutword_speed, 'KB/s')
def home(): files = listdir(STORAGE) nfile = len(files) codes = [get_md5(f) for f in files] filesizes = [get_filesize(sep.join([STORAGE, f])) for f in files] memory = get_foldersize(STORAGE) global code_map code_map = dict(zip(codes, files)) times = [get_file_create_time(sep.join([STORAGE, f])) for f in files] codes, files, filesizes, times = sort_by_time(codes, files, filesizes, times) return render_template( "home.html", nfile=nfile, memory=memory, items=zip(codes, files, filesizes, times), )
def merge(self, file_id, index_md, content): # check that PDF file is indexed in index metadata if file_id not in index_md: print("Article ID \'{}\' not found in metadata.".format(file_id)) exit() # get input data file path get_data = lambda field: index_md[file_id][field] if field in index_md[ file_id] else "" file_path = os.path.join(params.get_path("articles", "input"), get_data("filename")) # extract ACM CCS concept metadata from raw text concepts_valid, concepts_invalid = self.ccs(content) metadata = { "id": file_id, "number": get_data("order"), "ref": file_id, "doi": get_data('doi'), "file_id": get_data("id"), "filename": get_data("filename"), "file_format": "application/pdf", "file_size": utils.get_filesize(file_path), "title": get_data("title"), "session": get_data("session"), "authors": self.authors(content, index_md[file_id]), "abstract": self.abst(content), "concepts": concepts_valid, "keywords": self.kws(content, concepts_invalid), "categories": self.cats(content), "general_terms": self.gts(content), "url": get_data("url"), "page_from": get_data("from"), "page_to": get_data("to"), "pages": get_data("pages"), "references": self.ref(content), } # self.validate(metadata) return metadata
async def ondevice_multisig_create(mode='p2wsh', addr_fmt=AF_P2WSH): # collect all xpub- exports on current SD card (must be > 1) # - ask for M value # - create wallet, save and also export # - also create electrum skel to go with that # - only expected to work with our ccxp-foo.json export files. from actions import file_picker import uos, ujson from utils import get_filesize from main import settings chain = chains.current_chain() my_xfp = settings.get('xfp') xpubs = [] files = [] has_mine = False deriv = None try: with CardSlot() as card: for path in card.get_paths(): for fn, ftype, *var in uos.ilistdir(path): if ftype == 0x4000: # ignore subdirs continue if not fn.startswith('ccxp-') or not fn.endswith('.json'): # wrong prefix/suffix: ignore continue full_fname = path + '/' + fn # Conside file size # sigh, OS/filesystem variations file_size = var[1] if len(var) == 2 else get_filesize( full_fname) if not (0 <= file_size <= 1000): # out of range size continue try: with open(full_fname, 'rt') as fp: vals = ujson.load(fp) ln = vals.get(mode) # value in file is BE32, but we want LE32 internally xfp = str2xfp(vals['xfp']) if not deriv: deriv = vals[mode + '_deriv'] else: assert deriv == vals[mode + '_deriv'], "wrong derivation" node, _, _ = import_xpub(ln) if xfp == my_xfp: has_mine = True xpubs.append( (xfp, chain.serialize_public(node, AF_P2SH))) files.append(fn) except CardMissingError: raise except Exception as exc: # show something for coders, but no user feedback sys.print_exception(exc) continue except CardMissingError: await needs_microsd() return # remove dups; easy to happen if you double-tap the export delme = set() for i in range(len(xpubs)): for j in range(len(xpubs)): if j in delme: continue if i == j: continue if xpubs[i] == xpubs[j]: delme.add(j) if delme: xpubs = [x for idx, x in enumerate(xpubs) if idx not in delme] if not xpubs or len(xpubs) == 1 and has_mine: await ux_show_story( "Unable to find any Coldcard exported keys on this card. Must have filename: ccxp-....json" ) return # add myself if not included already if not has_mine: with stash.SensitiveValues() as sv: node = sv.derive_path(deriv) xpubs.append((my_xfp, chain.serialize_public(node, AF_P2SH))) N = len(xpubs) if N > MAX_SIGNERS: await ux_show_story("Too many signers, max is %d." % MAX_SIGNERS) return # pick useful M value to start assert N >= 2 M = (N - 1) if N < 4 else ((N // 2) + 1) while 1: msg = '''How many need to sign?\n %d of %d Press (7 or 9) to change M value, or OK \ to continue. If you expected more or less keys (N=%d #files=%d), \ then check card and file contents. Coldcard multisig setup file and an Electrum wallet file will be created automatically.\ ''' % (M, N, N, len(files)) ch = await ux_show_story(msg, escape='123479') if ch in '1234': M = min(N, int(ch)) # undocumented shortcut elif ch == '9': M = min(N, M + 1) elif ch == '7': M = max(1, M - 1) elif ch == 'x': await ux_dramatic_pause('Aborted.', 2) return elif ch == 'y': break # create appropriate object assert 1 <= M <= N <= MAX_SIGNERS name = 'CC-%d-of-%d' % (M, N) ms = MultisigWallet(name, (M, N), xpubs, chain_type=chain.ctype, common_prefix=deriv[2:], addr_fmt=addr_fmt) from auth import NewEnrollRequest, active_request active_request = NewEnrollRequest(ms, auto_export=True) # menu item case: add to stack from ux import the_ux the_ux.push(active_request)
async def file_picker(msg, suffix=None, min_size=1, max_size=1000000, taster=None, choices=None, escape=None): # present a menu w/ a list of files... to be read # - optionally, enforce a max size, and provide a "tasting" function # - if msg==None, don't prompt, just do the search and return list # - if choices is provided; skip search process # - escape: allow these chars to skip picking process from menu import MenuSystem, MenuItem import uos from utils import get_filesize if choices is None: choices = [] try: with CardSlot() as card: sofar = set() for path in card.get_paths(): for fn, ftype, *var in uos.ilistdir(path): if ftype == 0x4000: # ignore subdirs continue if suffix and not fn.lower().endswith(suffix): # wrong suffix continue if fn[0] == '.': continue full_fname = path + '/' + fn # Conside file size # sigh, OS/filesystem variations file_size = var[1] if len(var) == 2 else get_filesize( full_fname) if not (min_size <= file_size <= max_size): continue if taster is not None: try: yummy = taster(full_fname) except IOError: #print("fail: %s" % full_fname) yummy = False if not yummy: continue label = fn while label in sofar: # just the file name isn't unique enough sometimes? # - shouldn't happen anymore now that we dno't support internal FS # - unless we do muliple paths label += path.split('/')[-1] + '/' + fn sofar.add(label) choices.append((label, path, fn)) except CardMissingError: # don't show anything if we're just gathering data if msg is not None: await needs_microsd() return None if msg is None: return choices if not choices: msg = 'Unable to find any suitable files for this operation. ' if suffix: msg += 'The filename must end in "%s". ' % suffix msg += '\n\nMaybe insert (another) SD card and try again?' await ux_show_story(msg) return # tell them they need to pick; can quit here too, but that's obvious. if len(choices) != 1: msg += '\n\nThere are %d files to pick from.' % len(choices) else: msg += '\n\nThere is only one file to pick from.' ch = await ux_show_story(msg, escape=escape) if escape and ch in escape: return ch if ch == 'x': return picked = [] async def clicked(_1, _2, item): picked.append('/'.join(item.arg)) the_ux.pop() items = [ MenuItem(label, f=clicked, arg=(path, fn)) for label, path, fn in choices ] if 0: # don't like; and now showing count on previous page if len(choices) == 1: # if only one choice, we could make the choice for them ... except very confusing items.append(MenuItem(' (one file)', f=None)) else: items.append(MenuItem(' (%d files)' % len(choices), f=None)) menu = MenuSystem(items) the_ux.push(menu) await menu.interact() return picked[0] if picked else None