def construct_subhash_vectors(fname, dup_map): """collect set of checksums per file, substituting numeric id (fno, hno) for text values""" result = [] FnameMap.reset() # initialize mapping tables ChecksumMap.reset() fd = open(fname) last_name = "" hash_set = [] for text in fd: (val, name) = parse_md5deep_subfile_entry(text) if name != last_name: vec = construct_vector(last_name, hash_set, dup_map) if vec: result.append(vec) last_name = name hash_set = [] hash_set.append(val) vec = construct_vector(name, hash_set, dup_map) if vec: result.append(vec) fd.close() return result