def _read_file(self, filename: str, password: str = '') -> dict: """ Reads the data from filename and returns the account dictionary, the encrypted master key, and the decrypted master key. """ # Read from the file if it exists. with pathlib_path(filename) as pass_file: lzma_data = pass_file.read_bytes() if pass_file.is_file() else b'' # Get the json data out of the file data or an empty json dict of # the file was empty. if lzma_data: json_data = lzma_decompress(lzma_data).decode() else: json_data = '{}' accounts_dict = json_loads(json_data) # Pop the master key out of the accounts dictionary so it won't be # operated on or listed. Also if no master key is found, create # one. encrypted_key = bytes.fromhex(accounts_dict.pop(self.MASTER_KEY_DIGEST, '')) if not encrypted_key: if not password: # Get the password to encrypt the master key. password = self._ask_pass('password') else: # Get the password to decrypt the key. password = self._ask_pass('password', verify=False) return CryptData(password, encrypted_key), accounts_dict
def read_file(filename: str, password: str = "") -> tuple: """ Reads the data from filename and returns the account dictionary, the encrypted master key, and the decrypted master key. """ # Read from the file if it exists. with pathlib_path(filename) as pass_file: lzma_data = pass_file.read_bytes() if pass_file.is_file() else b"" # Get the json data out of the file data or an empty json dict of # the file was empty. if lzma_data: json_data = lzma_decompress(lzma_data).decode() else: json_data = "{}" accounts_dict = json_loads(json_data) # Pop the master key out of the accounts dictionary so it won't be # operated on or listed. Also if no master key is found, create # one. encrypted_key = bytes.fromhex(accounts_dict.pop(MASTER_KEY_DIGEST, "")) encrypted_key, master_key = get_master_key(encrypted_key, password) return accounts_dict, encrypted_key, master_key
def write_file(filename: str, accounts_dict: dict, encrypted_key: bytes): """ Compresses and writes the accounts_dict to the file at filename. """ # Put the master key into the accounts dict. accounts_dict[MASTER_KEY_DIGEST] = encrypted_key.hex() json_data = json_dumps(accounts_dict) lzma_data = lzma_compress(json_data.encode()) with pathlib_path(filename) as pass_file: pass_file.write_bytes(lzma_data)
def read_file(filename: str) -> dict: """ Reads the data from filename and returns the account dictionary. """ # Read from the file if it exists. with pathlib_path(filename) as pass_file: lzma_data = pass_file.read_bytes() if pass_file.is_file() else b'' # Get the json data out of the file data or an empty json dict of # the file was empty. if lzma_data: json_data = lzma_decompress(lzma_data).decode() else: json_data = '{}' # Load the json data into a dictionary. return json_loads(json_data)
def read_file(filename: str, password: str = '') -> tuple: """ Reads the data from filename and returns the account dictionary, the encrypted master key, and the decrypted master key. """ # Read from the file if it exists. with pathlib_path(filename) as pass_file: lzma_data = pass_file.read_bytes() if pass_file.is_file() else b'' # Get the json data out of the file data or an empty json dict of # the file was empty. if lzma_data: json_data = lzma_decompress(lzma_data).decode() else: json_data = '{}' accounts_dict = json_loads(json_data) # Pop the master key out of the accounts dictionary so it won't be # operated on or listed. Also if no master key is found, create # one. encrypted_key = bytes.fromhex(accounts_dict.pop(MASTER_KEY_DIGEST, '')) if not encrypted_key: if not password: # Get the password to encrypt the master key. password = get_pass('password') # Generate the largest key possible. master_key = Random.new().read(KEY_LEN) # Encrypt the key. encrypted_key = encrypt_key(master_key, password) else: # Get the password to decrypt the key. password = get_pass('password', verify=False) master_key = decrypt_key(encrypted_key, password) return accounts_dict, encrypted_key, master_key
def __init__(self, n_paired): _train_annotations_path = '../data/mscoco/annotations/captions_train2014.json' _val_annotations_path = '../data/mscoco/annotations/captions_val2014.json' _train_images_dir = '../data/mscoco/train2014/' _val_images_dir = '../data/mscoco/val2014/' _caption_path = '../data/mscoco/captions.pickle' _image_path = '../data/mscoco/images.pickle' self._padding = '<PAD>' self._oov = '<OOV>' self._go = '<GO>' self._eof = '<EOF>' self._symbols = [self._oov, self._padding, self._eof, self._go] self._inverse_vocab = None paths = [(_train_annotations_path, _train_images_dir), (_val_annotations_path, _val_images_dir)] if os.path.isfile(_image_path): print("Loading images...", flush=True) with open(_image_path, 'rb') as ff: data = pickle.load(ff) self._images = data['images'] self._val_images = data['val_images'] print("Images loaded.", flush=True) else: for j, (ann_p,im_p) in enumerate(paths): with open(ann_p) as ff: ann = json.load(ff) print("Creating image dictionary..", flush=True) images = dict() # key,value ---> image_id, image array for k in ann['images']: file_path = im_p + k['file_name'] im_file = pathlib_path(file_path) if im_file.exists(): image = ndimage.imread(file_path) image = imresize(image, size=(48, 64), interp='cubic') if image.shape == (48, 64): image = np.expand_dims(image, axis=2) image = np.concatenate((image, image, image), axis=2) image = np.reshape(image, newshape=[1, -1]) images[k['id']] = image if j == 0: # training set self._images = images else: # validation set self._val_images = images tosave = dict() tosave['images'] = self._images tosave['val_images'] = self._val_images print("Saving images...", flush=True) with open(_image_path, 'wb') as ff: pickle.dump(tosave, ff, pickle.HIGHEST_PROTOCOL) print("Saved.", flush=True) if os.path.isfile(_caption_path): # load processed data print("Loading data...", flush=True) with open(_caption_path, 'rb') as ff: data = pickle.load(ff) self._vocab = data['vocab'] self._captions = data['captions'] self._imcapt = data['imcapt'] self._val_captions = data['val_captions'] self._val_imcapt = data['val_imcapt'] self._max_seq_len = data['max_seq_len'] print("Data loaded.", flush=True) else: # process data and load print("Processing data..", flush=True) self._max_seq_len = 1 for j, (ann_p,im_p) in enumerate(paths): with open(ann_p) as ff: ann = json.load(ff) print("Creating caption dictionary..", flush=True) captions = dict() # key,value ---> caption_id, word sequence for k in ann['annotations']: capt = k['caption'] # caption preprocessing capt = capt.strip() # remove unnecessary whitespace capt = capt.lower() # make lower case capt = capt.replace('.', ' ') # remove periods capt = capt.replace(',', ' ') # remove commas capt = capt.replace('?', ' ') # remove question marks capt = capt.replace('-', ' ') # remove dashes capt = capt.replace('"', ' " ') # expand double quotes capt = capt.replace('(', ' ( ') # expand brackets capt = capt.replace(')', ' ) ') # expand brackets capt = capt.replace('{', ' { ') # expand brackets capt = capt.replace('}', ' } ') # expand brackets capt = capt.split() # split string capt.append(self._eof) # pad with EOF character captions[k['id']] = capt self._max_seq_len = max(max([len(_v) for _,_v in captions.items()]), self._max_seq_len) print("Max sequence length: ", self._max_seq_len, flush=True) if j == 0: # training set print("Word frequencies", flush=True) freqs = defaultdict(int) for _, capt in captions.items(): for word in capt: freqs[word] += 1 print("Adding <OOV> words", flush=True) min_freq = 2 # minimum word frequency for k,capt in captions.items(): for i,w in enumerate(capt): if freqs[w] < min_freq: if np.random.binomial(n=1, p=0.9) == 1: # 90% chance of setting <OOV> capt[i] = self._oov print("Creating vocabulary..", flush=True) if j > 0: # validation set vocab = self._vocab else: vocab = dict() # key,value ---> word, word_id words = {w for _, _v in captions.items() for w in _v} for i,w in enumerate(words): vocab[w] = i for s in self._symbols: # add symbols to vocab dictionary if not already there if s not in vocab: idx = max([v for k,v in vocab.items()]) + 1 vocab[s] = idx print("Converting captions to ids (from vocab)..", flush=True) for _k,_v in captions.items(): for i in range(len(_v)): if _v[i] in vocab: _v[i] = vocab[_v[i]] else: _v[i] = vocab[self._oov] print("Creating image-caption mapping..", flush=True) im_capt = defaultdict(set) # key,value ---> image_id, set of caption ids for k in ann['annotations']: im_capt[k['image_id']].add(k['id']) if j == 0: # training set self._captions = captions self._vocab = vocab self._imcapt = im_capt else: # validation set self._val_captions = captions self._vocab = vocab self._val_imcapt = im_capt tosave = dict() tosave['vocab'] = self._vocab tosave['captions'] = self._captions tosave['imcapt'] = self._imcapt tosave['val_captions'] = self._val_captions tosave['val_imcapt'] = self._val_imcapt tosave['max_seq_len'] = self._max_seq_len print("Saving data...", flush=True) with open(_caption_path, 'wb') as ff: pickle.dump(tosave, ff, pickle.HIGHEST_PROTOCOL) print("Saved.", flush=True) # lists of image ids self.image_ids = list(self._images.keys()) self.val_image_ids = list(self._val_images.keys()) # construct pairings _n = len(self.image_ids) self.paired = set(np.random.choice(self.image_ids, size=n_paired, replace=False)) _remain = set(self.image_ids) - self.paired _each_size = len(_remain) // 2 self.image_only = set(np.random.choice(list(_remain), size=_each_size, replace=False)) self.caption_only = _remain - self.image_only self.paired = list(self.paired) self.image_only = list(self.image_only) self.caption_only = list(self.caption_only)