def create_haystack(pid, gid, files): for fn, fpath in files.items(): checksum = fn.split('-', 1)[0] #一致性 检查 try: s2recovery_util.check_file_consistent(fpath, checksum) except s2recovery_util.FileDamaged as e: logger.info( repr(e) + ' ' + fpath ) raise group.FileDamaged ( ('pid', pid), ('gid', gid), ('fn', fn) ) needle_list = create_needle_list(files) #创建needle-list 格式如下 #[{'file_names': ['069e6fabca59e1ba3db034950b88aa074708cf30-99532'], 'data_parts': [{'path': #'/data6/064247da00014006a370842b2b0c0467/g553698/040/069e6fabca59e1ba3db034950b88aa074708cf30-99532'#, 'size': 229, 'offset': 0}], 'key': #'\x06\x9eo\xab\xcaY\xe1\xba=\xb04\x95\x0b\x88\xaa\x07G\x08\xcf0'}, hpath = get_haystack_path(pid, gid) _mkdir(hpath) #创建 haystack目录 port = partition.pid_port(pid) h = haystack.Haystack(hpath) try: h.create( needle_list, chunk_num = CHUNK_NUM, switch_version_port = port, switch_version_url = SWITCH_VERSION_PREFIX + pid + '/' + str(gid) ) except haystack_version.VersionExisted as e: logger.info( 'haystack %s %s current version is existed ' % (pid, gid) )
def recovery_by_haystack(pid, gid): hpath = get_haystack_path(pid, gid) h = haystack.Haystack(hpath) old_num = h.get_latest_version_num() ver_num, haystack_fpaths = download_haystack(pid, gid) renamed_paths = [] try: attr_sha1 = haystack_version.get_file_sha1( haystack_fpaths['attribute'] ) for fn_key, src_path in haystack_fpaths.items(): dst_path = get_haystack_file_path(pid, gid, ver_num, fn_key) rename_force(src_path, dst_path) renamed_paths.append(dst_path) h.reset_version_num( ver_num, attr_sha1 ) except Exception as e: delete_files( renamed_paths ) delete_files( haystack_fpaths.values() ) raise switch_port = partition.pid_port(pid) switch_url = SWITCH_VERSION_PREFIX + pid + '/' + str(gid) h._switch_version(switch_port, switch_url) if old_num != ver_num: h.delete_version(old_num, switch_port, switch_url)
def create_haystack(pid, gid, files): for fn, fpath in files.items(): checksum = fn.split('-', 1)[0] try: s2recovery_util.check_file_consistent(fpath, checksum) except s2recovery_util.FileDamaged as e: logger.info( repr(e) + ' ' + fpath ) raise group.FileDamaged ( ('pid', pid), ('gid', gid), ('fn', fn) ) needle_list = create_needle_list(files) hpath = get_haystack_path(pid, gid) _mkdir(hpath) port = partition.pid_port(pid) h = haystack.Haystack(hpath) try: h.create( needle_list, chunk_num = CHUNK_NUM, switch_version_port = port, switch_version_url = SWITCH_VERSION_PREFIX + pid + '/' + str(gid) ) except haystack_version.VersionExisted as e: logger.info( 'haystack %s %s current version is existed ' % (pid, gid) )
def clear_haystack_original_fns(pid, gid): hpath = get_haystack_path(pid, gid) h = haystack.Haystack(hpath) ver_num = h.get_latest_version_num() if ver_num is not None: empty_file( get_haystack_file_path(pid, gid, ver_num, 'original_file_names') )
def check_haystack(pid, gid): gid = int(gid) hpath = get_haystack_path(pid, gid) h = haystack.Haystack(hpath) if not h.exist(): return logger.info('haystack_check: pid:%s, gid:%d haystack begin checking !'%(pid, gid)) check_global_version_file(pid, gid) check_haystack_files(pid, gid) logger.info('haystack_check: pid:%s, gid:%d haystack finish checking !'%(pid, gid))
def check_global_version_file(pid, gid): if is_global_version_file_ok(pid, gid): return logger.info( 'pid: %s, gid:%s global version file damaged!'%(pid, gid) ) hpath = get_haystack_path(pid, gid) h = haystack.Haystack(hpath) empty_file(h.global_version) if recovery_global_version_by_attr_file(pid, gid): return recovery_haystack(pid, gid)
def is_global_version_file_ok(pid, gid): hpath = get_haystack_path(pid, gid) h = haystack.Haystack(hpath) try: ver_num = h.get_latest_version_num() if ver_num is None: return False except (OSError, IOError) as e: logger.info(repr(e) + 'haystack_check: pid:%s, gid:%d, global_version file damage'%(pid, gid)) return False if os.path.exists( get_haystack_file_path(pid, gid, ver_num, 'attribute') ): return True else: logger.warn( 'haystack_check: pid:%s, gid:%d, global_version file damage'%(pid, gid) ) return False
def recovery_global_version_by_attr_file(pid, gid): hpath = get_haystack_path(pid, gid) h = haystack.Haystack(hpath) fn = get_latest_attribute_fn(hpath) if fn is None: return False try: attr_sha1 = haystack_version.get_file_sha1( os.path.join(hpath, fn) ) except haystack_version.FileDamaged as e: logger.info( 'attribute file is damaged %s' %fn ) return False ver_num = fn[ len('haystack_') : -len('_attribute') ] h.reset_version_num(ver_num, attr_sha1) return True
def remerge_by_original_files(pid, gid): logger.info('haystack_check: pid:%s, gid:%d, begin creating new haystack by original files!'%(pid, gid)) clear_haystack_original_fns(pid, gid) try: s2recovery.recover_group(pid, gid) except s2recovery.GroupIncomplete as e: pass hash_files = get_hash_files(pid, gid) needle_list = create_needle_list(hash_files) hpath = get_haystack_path(pid, gid) h = haystack.Haystack(hpath) ver_num = h.get_latest_version_num() if ver_num is not None: try: ver = haystack_version.HaystackVersion(hpath, ver_num) needles = ver.get_valid_needle_list( ver.get_damaged_chunks_indexes() ) needle_list.extend(needles) except (haystack_version.VersionNotFound, haystack_version.FileDamaged) as e: logger.info( repr(e) + 'version %s error!' % ver_num ) new_num = h.create_version(needle_list, CHUNK_NUM, switch_version_port = partition.pid_port(pid), switch_version_url = SWITCH_VERSION_PREFIX + pid + '/' + str(gid), force = True) delete_files( hash_files.values() ) delete_empty_hash_folders(pid, gid) logger.info('haystack_check: pid:%s, gid:%d, finish creating new haystack by original files, version num is %s !' %(pid, gid, ver_num))
def check_haystack_files(pid, gid): hpath = get_haystack_path(pid, gid) h = haystack.Haystack(hpath) ver_num = h.get_latest_version_num() fpath = get_haystack_file_path(pid, gid, ver_num, 'attribute') if is_attribute_file_ok(fpath) == False: logger.info( 'pid: %s gid:%s ver_num:%s attribute file damaged!'%(pid, gid, ver_num) ) try: sha1 = h.get_attr_sha1(ver_num) recovery_haystack_file(pid, gid, ver_num, 'attribute', sha1) except s2recovery_util.DownloadError as e: logger.info( repr(e) + 'pid:%s gid:%d ver_num:%s recovery attribute fail!'%(pid, gid, ver_num) ) recovery_haystack(pid, gid) return hfs = get_haystack_files_list(pid, gid) for fn_key, sha1 in hfs['files_sha1'].items(): if not is_haystack_file_ok(pid, gid, hfs['version'], fn_key, sha1): logger.info( 'pid: %s gid:%s ver_num:%s fn:%s file damaged!'%(pid, gid, hfs['version'], fn_key) ) try: recovery_haystack_file(pid, gid, hfs['version'], fn_key, sha1) except s2recovery_util.DownloadError as e: logger.info( repr(e) + 'pid:%s gid:%d ver_num:%s recovery fn:%s fail!'%(pid, gid, hfs['version'], fn_key) ) break else: return recovery_haystack(pid, gid)