Ejemplo n.º 1
0
def test_cut(seg, path, encoding='utf-8'):
    test_file = open(path, encoding=encoding)
    p = 0
    r = 0
    cnt = 0
    texts = list()
    time_start = time.time()
    for row in test_file.readlines():
        if row.strip() == '':
            continue
        text = re.sub('(/[a-z,A-Z]+)* *', '', row).strip()
        texts.append(text)
        words_cut = seg.cut(text)
        words_norm = list(map(utils.get_word_from_wordpos, row.split()))
        words_cut_cnt = len(words_cut)
        words_norm_cnt = len(words_norm)
        words_occur_cnt = len([1 for word in words_cut if word in words_norm])
        p += words_occur_cnt / words_cut_cnt
        r += words_occur_cnt / words_norm_cnt
        cnt += 1
    precision_rate = p / cnt
    recall_rate = r / cnt
    cutword_speed = utils.get_filesize(path) / (time.time() - time_start)
    print("精确率:", precision_rate)
    print("召回率:", recall_rate)
    print("分词速度:", cutword_speed, 'KB/s')
Ejemplo n.º 2
0
def home():
    files = listdir(STORAGE)
    nfile = len(files)
    codes = [get_md5(f) for f in files]
    filesizes = [get_filesize(sep.join([STORAGE, f])) for f in files]
    memory = get_foldersize(STORAGE)
    global code_map
    code_map = dict(zip(codes, files))
    times = [get_file_create_time(sep.join([STORAGE, f])) for f in files]
    codes, files, filesizes, times = sort_by_time(codes, files, filesizes,
                                                  times)
    return render_template(
        "home.html",
        nfile=nfile,
        memory=memory,
        items=zip(codes, files, filesizes, times),
    )
Ejemplo n.º 3
0
    def merge(self, file_id, index_md, content):

        # check that PDF file is indexed in index metadata
        if file_id not in index_md:
            print("Article ID \'{}\' not found in metadata.".format(file_id))
            exit()

        # get input data file path
        get_data = lambda field: index_md[file_id][field] if field in index_md[
            file_id] else ""
        file_path = os.path.join(params.get_path("articles", "input"),
                                 get_data("filename"))

        # extract ACM CCS concept metadata from raw text
        concepts_valid, concepts_invalid = self.ccs(content)

        metadata = {
            "id": file_id,
            "number": get_data("order"),
            "ref": file_id,
            "doi": get_data('doi'),
            "file_id": get_data("id"),
            "filename": get_data("filename"),
            "file_format": "application/pdf",
            "file_size": utils.get_filesize(file_path),
            "title": get_data("title"),
            "session": get_data("session"),
            "authors": self.authors(content, index_md[file_id]),
            "abstract": self.abst(content),
            "concepts": concepts_valid,
            "keywords": self.kws(content, concepts_invalid),
            "categories": self.cats(content),
            "general_terms": self.gts(content),
            "url": get_data("url"),
            "page_from": get_data("from"),
            "page_to": get_data("to"),
            "pages": get_data("pages"),
            "references": self.ref(content),
        }
        # self.validate(metadata)
        return metadata
Ejemplo n.º 4
0
async def ondevice_multisig_create(mode='p2wsh', addr_fmt=AF_P2WSH):
    # collect all xpub- exports on current SD card (must be > 1)
    # - ask for M value
    # - create wallet, save and also export
    # - also create electrum skel to go with that
    # - only expected to work with our ccxp-foo.json export files.
    from actions import file_picker
    import uos, ujson
    from utils import get_filesize
    from main import settings

    chain = chains.current_chain()
    my_xfp = settings.get('xfp')

    xpubs = []
    files = []
    has_mine = False
    deriv = None
    try:
        with CardSlot() as card:
            for path in card.get_paths():
                for fn, ftype, *var in uos.ilistdir(path):
                    if ftype == 0x4000:
                        # ignore subdirs
                        continue

                    if not fn.startswith('ccxp-') or not fn.endswith('.json'):
                        # wrong prefix/suffix: ignore
                        continue

                    full_fname = path + '/' + fn

                    # Conside file size
                    # sigh, OS/filesystem variations
                    file_size = var[1] if len(var) == 2 else get_filesize(
                        full_fname)

                    if not (0 <= file_size <= 1000):
                        # out of range size
                        continue

                    try:
                        with open(full_fname, 'rt') as fp:
                            vals = ujson.load(fp)

                        ln = vals.get(mode)

                        # value in file is BE32, but we want LE32 internally
                        xfp = str2xfp(vals['xfp'])
                        if not deriv:
                            deriv = vals[mode + '_deriv']
                        else:
                            assert deriv == vals[mode +
                                                 '_deriv'], "wrong derivation"

                        node, _, _ = import_xpub(ln)

                        if xfp == my_xfp:
                            has_mine = True

                        xpubs.append(
                            (xfp, chain.serialize_public(node, AF_P2SH)))
                        files.append(fn)

                    except CardMissingError:
                        raise

                    except Exception as exc:
                        # show something for coders, but no user feedback
                        sys.print_exception(exc)
                        continue

    except CardMissingError:
        await needs_microsd()
        return

    # remove dups; easy to happen if you double-tap the export
    delme = set()
    for i in range(len(xpubs)):
        for j in range(len(xpubs)):
            if j in delme: continue
            if i == j: continue
            if xpubs[i] == xpubs[j]:
                delme.add(j)
    if delme:
        xpubs = [x for idx, x in enumerate(xpubs) if idx not in delme]

    if not xpubs or len(xpubs) == 1 and has_mine:
        await ux_show_story(
            "Unable to find any Coldcard exported keys on this card. Must have filename: ccxp-....json"
        )
        return

    # add myself if not included already
    if not has_mine:
        with stash.SensitiveValues() as sv:
            node = sv.derive_path(deriv)
            xpubs.append((my_xfp, chain.serialize_public(node, AF_P2SH)))

    N = len(xpubs)

    if N > MAX_SIGNERS:
        await ux_show_story("Too many signers, max is %d." % MAX_SIGNERS)
        return

    # pick useful M value to start
    assert N >= 2
    M = (N - 1) if N < 4 else ((N // 2) + 1)

    while 1:
        msg = '''How many need to sign?\n      %d of %d

Press (7 or 9) to change M value, or OK \
to continue.

If you expected more or less keys (N=%d #files=%d), \
then check card and file contents.

Coldcard multisig setup file and an Electrum wallet file will be created automatically.\
''' % (M, N, N, len(files))

        ch = await ux_show_story(msg, escape='123479')

        if ch in '1234':
            M = min(N, int(ch))  # undocumented shortcut
        elif ch == '9':
            M = min(N, M + 1)
        elif ch == '7':
            M = max(1, M - 1)
        elif ch == 'x':
            await ux_dramatic_pause('Aborted.', 2)
            return
        elif ch == 'y':
            break

    # create appropriate object
    assert 1 <= M <= N <= MAX_SIGNERS

    name = 'CC-%d-of-%d' % (M, N)
    ms = MultisigWallet(name, (M, N),
                        xpubs,
                        chain_type=chain.ctype,
                        common_prefix=deriv[2:],
                        addr_fmt=addr_fmt)

    from auth import NewEnrollRequest, active_request

    active_request = NewEnrollRequest(ms, auto_export=True)

    # menu item case: add to stack
    from ux import the_ux
    the_ux.push(active_request)
Ejemplo n.º 5
0
async def file_picker(msg,
                      suffix=None,
                      min_size=1,
                      max_size=1000000,
                      taster=None,
                      choices=None,
                      escape=None):
    # present a menu w/ a list of files... to be read
    # - optionally, enforce a max size, and provide a "tasting" function
    # - if msg==None, don't prompt, just do the search and return list
    # - if choices is provided; skip search process
    # - escape: allow these chars to skip picking process
    from menu import MenuSystem, MenuItem
    import uos
    from utils import get_filesize

    if choices is None:
        choices = []
        try:
            with CardSlot() as card:
                sofar = set()

                for path in card.get_paths():
                    for fn, ftype, *var in uos.ilistdir(path):
                        if ftype == 0x4000:
                            # ignore subdirs
                            continue

                        if suffix and not fn.lower().endswith(suffix):
                            # wrong suffix
                            continue

                        if fn[0] == '.': continue

                        full_fname = path + '/' + fn

                        # Conside file size
                        # sigh, OS/filesystem variations
                        file_size = var[1] if len(var) == 2 else get_filesize(
                            full_fname)

                        if not (min_size <= file_size <= max_size):
                            continue

                        if taster is not None:
                            try:
                                yummy = taster(full_fname)
                            except IOError:
                                #print("fail: %s" % full_fname)
                                yummy = False

                            if not yummy:
                                continue

                        label = fn
                        while label in sofar:
                            # just the file name isn't unique enough sometimes?
                            # - shouldn't happen anymore now that we dno't support internal FS
                            # - unless we do muliple paths
                            label += path.split('/')[-1] + '/' + fn

                        sofar.add(label)
                        choices.append((label, path, fn))

        except CardMissingError:
            # don't show anything if we're just gathering data
            if msg is not None:
                await needs_microsd()
            return None

    if msg is None:
        return choices

    if not choices:
        msg = 'Unable to find any suitable files for this operation. '

        if suffix:
            msg += 'The filename must end in "%s". ' % suffix

        msg += '\n\nMaybe insert (another) SD card and try again?'

        await ux_show_story(msg)
        return

    # tell them they need to pick; can quit here too, but that's obvious.
    if len(choices) != 1:
        msg += '\n\nThere are %d files to pick from.' % len(choices)
    else:
        msg += '\n\nThere is only one file to pick from.'

    ch = await ux_show_story(msg, escape=escape)
    if escape and ch in escape: return ch
    if ch == 'x': return

    picked = []

    async def clicked(_1, _2, item):
        picked.append('/'.join(item.arg))
        the_ux.pop()

    items = [
        MenuItem(label, f=clicked, arg=(path, fn))
        for label, path, fn in choices
    ]

    if 0:
        # don't like; and now showing count on previous page
        if len(choices) == 1:
            # if only one choice, we could make the choice for them ... except very confusing
            items.append(MenuItem('  (one file)', f=None))
        else:
            items.append(MenuItem('  (%d files)' % len(choices), f=None))

    menu = MenuSystem(items)
    the_ux.push(menu)

    await menu.interact()

    return picked[0] if picked else None