Ejemplo n.º 1
0
def read():
	db = DataBase('not_bsd.dat')
	a = db['a']
	b = db['b']
	db.close()

	print('a', a)
	print('b', b)
Ejemplo n.º 2
0
    def load_workers(self, filename):
        """
        Return a list of workers read from disk as [(id, started, assetid),...].

        """
        shelf = DbfilenameShelf(filename)
        try:
            workers = shelf['workers']
        except:
            workers = []
        shelf.close()
        return workers
Ejemplo n.º 3
0
    def dump_workers(self, filename, workers):
        """
        Write a sequence of workers written to disk, e.g.
        [(id, started, assetid),...], and then return the sequence.

        """
        seq = []
        for w in workers:
            seq.append((w['worker'], w['started'], w['args'][1]))
        shelf = DbfilenameShelf(filename)
        shelf['workers'] = seq 
        shelf.close()
        return seq
Ejemplo n.º 4
0
def update_shelf():
    url_mention = DbfilenameShelf(args.in_shelf, protocol=-1)
    TM = url_mention['__TOKEN_MAPPER__']
    TM.finalize(catpeople_baseline_nb_config.MAX_TOK)
    E = url_mention['__URL_LIST__']
    n_doc = 10000
    with rasengan.tictoc('Extracting Contexts'):
        df_obj = TextualClueObject(E[:n_doc], url_mention, TM)
    df = defaultdict(int)
    for features in df_obj.features.itervalues():
        for f in features:
            df[f] += 1
    for f in df.keys():
        df[f] = df[f] / float(n_doc)
    url_mention['__DF__'] = dict(df)
    url_mention.close()
    return
Ejemplo n.º 5
0
                    del seg[e_sent_tok_idx]
            else:
                # import pdb; pdb.set_trace()
                print >> sys.stderr, "Skipped mention", mention[1]
                continue
        if seg[e_sent][1] - seg[e_sent][0] > MAX_CHAR_IN_SENT:
            # The sentence that contains the entity is too long.
            print >> sys.stderr, '__noisy__', text[
                seg[e_sent][0]:seg[e_sent][1]]
            continue
        e_start = mention_start - seg[e_sent][0]
        # Remove too long sentences.
        sentences = [[tmp_tok.lower() for tmp_tok in PAT_TOKENIZER(text[a:b])]
                     for (a, b) in seg if b - a <= MAX_CHAR_IN_SENT]
        mapped_sentences = [TOKEN_MAPPER(e) for e in sentences]
        # Adjust pointer to sentence that contain the entity since we
        # might have removed some extremely long sentences.
        # idx_to_sentence_that_contains_entity
        itste = (e_sent -
                 sum(1 for (a, b) in seg[:e_sent] if b - a > MAX_CHAR_IN_SENT))
        esidx, eeidx = tokens_in_tokenization_corresponding_to_a_span(
            text[seg[itste][0]:seg[itste][1]], e_start, e_start + len_mention,
            sentences[itste])
        out_mention = [mapped_sentences, itste, esidx, eeidx]
        out_mentions.append(out_mention)
        pass
    out_shelf[url] = deduplicate_unhashables(out_mentions)
out_shelf['__URL_LIST__'] = urls
out_shelf['__TOKEN_MAPPER__'] = TOKEN_MAPPER
out_shelf.close()
Ejemplo n.º 6
0
                                connection.commit()
                                session_store['authenticated'] = False
                                form = ''
                                message = '<p>Your account has been successfully deleted.</p>'
                            cursor.close()
                            connection.close()
                        else:
                            message = '<p><strong>Error! Passwords must match.</strong></p>'
                    else:
                        message = '<p><strong>Error! Please check the box to confirm deletion.</strong></p>'
                result = """<section>
                    <h2>Delete Account</h2>"""
                result += form
                result += message
                result += '</section>'
            session_store.close()
except (db.Error, IOError):
    result = '<p>Sorry! We are experiencing problems at the moment. Please call back later.</p>'

print("""
    <!DOCTYPE html>
    <html lang="en">
        <head>
            <meta charset="utf-8" />
            <link rel="stylesheet" href="styles.css" />
            <meta name="viewport" content="initial-scale=1.0, width=device-width" />
            <title>Delete Account</title>
        </head>
        <body>
            <header>
                <h1>a-MAZE-ing</h1>
    out_val.default_factory = None  # FINALIZE out_val
    return out_val


with rasengan.tictoc('Reading wikilinks'):
    # With joblib this takes only 8 minutes !!
    from joblib import Parallel, delayed
    out_val_list = Parallel(n_jobs=10)(
        delayed(get_mention_from_wikilink_thrift_file)(fn)
        for fn in range(1, args.last_f2r))
    # out_val_list = [get_mention_from_wikilink_thrift_file(fn)
    #                 for fn in range(1, args.last_f2r)]

with rasengan.tictoc('Shelving'):
    import shelve
    from shelve import DbfilenameShelf
    total_data = defaultdict(list)
    for out_val in out_val_list:
        for url in out_val:
            total_data[url].extend(out_val[url])
    total_data.default_factory = None  # FINALIZE out_val
    # Save the results of the processing.
    shelf = DbfilenameShelf(args.out_fn, protocol=-1)
    shelf['__URL_LIST__'] = total_data.keys()
    for url in shelf['__URL_LIST__']:
        shelf[url] = total_data[url]
    shelf.close()
    # Validation
    for e in POOL:
        assert e in total_data