def read(): db = DataBase('not_bsd.dat') a = db['a'] b = db['b'] db.close() print('a', a) print('b', b)
def load_workers(self, filename): """ Return a list of workers read from disk as [(id, started, assetid),...]. """ shelf = DbfilenameShelf(filename) try: workers = shelf['workers'] except: workers = [] shelf.close() return workers
def dump_workers(self, filename, workers): """ Write a sequence of workers written to disk, e.g. [(id, started, assetid),...], and then return the sequence. """ seq = [] for w in workers: seq.append((w['worker'], w['started'], w['args'][1])) shelf = DbfilenameShelf(filename) shelf['workers'] = seq shelf.close() return seq
def update_shelf(): url_mention = DbfilenameShelf(args.in_shelf, protocol=-1) TM = url_mention['__TOKEN_MAPPER__'] TM.finalize(catpeople_baseline_nb_config.MAX_TOK) E = url_mention['__URL_LIST__'] n_doc = 10000 with rasengan.tictoc('Extracting Contexts'): df_obj = TextualClueObject(E[:n_doc], url_mention, TM) df = defaultdict(int) for features in df_obj.features.itervalues(): for f in features: df[f] += 1 for f in df.keys(): df[f] = df[f] / float(n_doc) url_mention['__DF__'] = dict(df) url_mention.close() return
del seg[e_sent_tok_idx] else: # import pdb; pdb.set_trace() print >> sys.stderr, "Skipped mention", mention[1] continue if seg[e_sent][1] - seg[e_sent][0] > MAX_CHAR_IN_SENT: # The sentence that contains the entity is too long. print >> sys.stderr, '__noisy__', text[ seg[e_sent][0]:seg[e_sent][1]] continue e_start = mention_start - seg[e_sent][0] # Remove too long sentences. sentences = [[tmp_tok.lower() for tmp_tok in PAT_TOKENIZER(text[a:b])] for (a, b) in seg if b - a <= MAX_CHAR_IN_SENT] mapped_sentences = [TOKEN_MAPPER(e) for e in sentences] # Adjust pointer to sentence that contain the entity since we # might have removed some extremely long sentences. # idx_to_sentence_that_contains_entity itste = (e_sent - sum(1 for (a, b) in seg[:e_sent] if b - a > MAX_CHAR_IN_SENT)) esidx, eeidx = tokens_in_tokenization_corresponding_to_a_span( text[seg[itste][0]:seg[itste][1]], e_start, e_start + len_mention, sentences[itste]) out_mention = [mapped_sentences, itste, esidx, eeidx] out_mentions.append(out_mention) pass out_shelf[url] = deduplicate_unhashables(out_mentions) out_shelf['__URL_LIST__'] = urls out_shelf['__TOKEN_MAPPER__'] = TOKEN_MAPPER out_shelf.close()
connection.commit() session_store['authenticated'] = False form = '' message = '<p>Your account has been successfully deleted.</p>' cursor.close() connection.close() else: message = '<p><strong>Error! Passwords must match.</strong></p>' else: message = '<p><strong>Error! Please check the box to confirm deletion.</strong></p>' result = """<section> <h2>Delete Account</h2>""" result += form result += message result += '</section>' session_store.close() except (db.Error, IOError): result = '<p>Sorry! We are experiencing problems at the moment. Please call back later.</p>' print(""" <!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8" /> <link rel="stylesheet" href="styles.css" /> <meta name="viewport" content="initial-scale=1.0, width=device-width" /> <title>Delete Account</title> </head> <body> <header> <h1>a-MAZE-ing</h1>
out_val.default_factory = None # FINALIZE out_val return out_val with rasengan.tictoc('Reading wikilinks'): # With joblib this takes only 8 minutes !! from joblib import Parallel, delayed out_val_list = Parallel(n_jobs=10)( delayed(get_mention_from_wikilink_thrift_file)(fn) for fn in range(1, args.last_f2r)) # out_val_list = [get_mention_from_wikilink_thrift_file(fn) # for fn in range(1, args.last_f2r)] with rasengan.tictoc('Shelving'): import shelve from shelve import DbfilenameShelf total_data = defaultdict(list) for out_val in out_val_list: for url in out_val: total_data[url].extend(out_val[url]) total_data.default_factory = None # FINALIZE out_val # Save the results of the processing. shelf = DbfilenameShelf(args.out_fn, protocol=-1) shelf['__URL_LIST__'] = total_data.keys() for url in shelf['__URL_LIST__']: shelf[url] = total_data[url] shelf.close() # Validation for e in POOL: assert e in total_data