def logError(errorDIR, recording_name, skip_path, index): print("\t\tLogging error for {} in directory {}.".format( recording_name, errorDIR)) try: shutil.move(J(RECORDING_PATH[index], recording_name + '.mcpr'), J(errorDIR, recording_name + '.mcpr')) except Exception as e: print("\t\tERRROR", e) pass logFile = open(LOG_FILE[index], 'r', os.O_NONBLOCK).read() with open(J(errorDIR, recording_name + '.log'), 'w') as f: f.write(logFile) try: shutil.rmtree(J(RECORDING_PATH[index], recording_name + '.mcpr.tmp')) except Exception as e: print("\t\tERRROR s", e) pass with open(skip_path, 'a'): try: os.utime(skip_path, None) # => Set skip time to now except OSError: pass # File deleted between open() and os.utime() calls
def construct_render_dirs(blacklist): """ Constructs the render directories omitting elements on a blacklist. """ dirs = [ RENDER_DIR, ERROR_PARENT_DIR, EOF_EXCEP_DIR, ZEROLEN_DIR, NULL_PTR_EXCEP_DIR, ZIP_ERROR_DIR, MISSING_RENDER_OUTPUT, X11_ERROR_DIR ] for dir in dirs: if not E(dir): os.makedirs(dir) # We only care about unrendered directories. render_dirs = [] for filename in tqdm.tqdm(os.listdir(MERGED_DIR)): if filename.endswith(".mcpr") and filename not in blacklist: recording_name = filename.split(".mcpr")[0] render_path = J(RENDER_DIR, recording_name) print(render_path) if not E(render_path): os.makedirs(render_path) render_dirs.append((recording_name, render_path)) return render_dirs
def render_actions(renders: list): """ For every render directory, we render the actions """ good_renders = [] bad_renders = [] for recording_name, render_path in tqdm.tqdm(renders): if E(J(render_path, 'network.npy')): if E(J(render_path, GOOD_MARKER_NAME)): good_renders.append((recording_name, render_path)) else: bad_renders.append((recording_name, render_path)) else: try: recording = get_recording_archive(recording_name) def extract(fname): return recording.extract(fname, render_path) # Extract actions assert str(ACTION_FILE) in [ str(x) for x in recording.namelist() ] # Extract it if it doesnt exist action_mcbr = extract(ACTION_FILE) # Check that it's not-empty. assert not os.stat(action_mcbr).st_size == 0 # Run the actual parse action and make sure that its actually of length 0. p = subprocess.Popen([ "python3", "parse_action.py", os.path.abspath(action_mcbr) ], cwd='action_rendering') returncode = (p.wait()) assert returncode == 0 good_renders.append((recording_name, render_path)) except AssertionError as e: _, _, tb = sys.exc_info() traceback.print_tb(tb) # Fixed format touch(J(render_path, BAD_MARKER_NAME)) remove(J(render_path, GOOD_MARKER_NAME)) bad_renders.append((recording_name, render_path)) return good_renders, bad_renders
def get_recording_archive(recording_name): """ Gets the zipfile object of a mcpr recording. """ mcpr_path = J(MERGED_DIR, (recording_name + ".mcpr")) assert E(mcpr_path) return zipfile.ZipFile(mcpr_path)
def add_key_frames(inputPath, segments): keyframes = [] for segment in segments: # Convert ticks into video FPS (don't use render ms!) keyframes.append(format_seconds(segment[3])) keyframes.append(format_seconds(segment[4])) split_cmd = [ 'ffmpeg', '-i', J(inputPath, 'recording.mp4'), '-c:a', 'copy', '-c:v', 'copy', '-force_key_frames', ','.join(keyframes), J(inputPath, 'keyframes_recording.mp4') ] # print('Running: ' + ' '.join(split_cmd)) try: subprocess.check_output(split_cmd, stderr=subprocess.STDOUT) except Exception as e: print('COMMAND FAILED:', e) print(split_cmd) FAILED_COMMANDS.append(split_cmd)
def extract_subclip(input_path, start_tick, stop_tick, output_name): split_cmd = [ 'ffmpeg', '-ss', format_seconds(start_tick), '-i', J(input_path, 'keyframes_recording.mp4'), '-t', format_seconds(stop_tick - start_tick), '-vcodec', 'copy', '-acodec', 'copy', '-y', output_name ] # print('Running: ' + ' '.join(split_cmd)) try: subprocess.check_output(split_cmd, stderr=subprocess.STDOUT) except Exception as e: print('COMMAND FAILED:', e) print(split_cmd) FAILED_COMMANDS.append(split_cmd)
def main(): """ The main render script. """ # 1. Load the blacklist. blacklist = set(np.loadtxt(BLACKLIST_PATH, dtype=np.str).tolist()) print("Constructing render directories.") renders = construct_render_dirs(blacklist) print("Validating metadata from files:") valid_renders, invalid_renders = render_metadata(renders) print(len(valid_renders)) # print("Rendering actions: ") # valid_renders, invalid_renders = render_actions(valid_renders) print("... found {} valid recordings and {} invalid recordings" " out of {} total files".format(len(valid_renders), len(invalid_renders), len(os.listdir(MERGED_DIR)))) unfinished_renders = [ v for v in valid_renders if not len(glob.glob(J(v[1], '*.mp4'))) ] print("... found {} unfinished renders out of {} valid renders".format( len(unfinished_renders), len(valid_renders))) print("Rendering videos: ") clean_render_dirs() # Render videos in multiprocessing queue multiprocessing.freeze_support() with multiprocessing.Pool(NUM_MINECRAFT_DIR, initializer=tqdm.tqdm.set_lock, initargs=(multiprocessing.RLock(), )) as pool: manager = ThreadManager(multiprocessing.Manager(), NUM_MINECRAFT_DIR, 0, 1) func = functools.partial(_render_videos, manager) num_rendered = list( tqdm.tqdm(pool.imap_unordered(func, unfinished_renders), total=len(unfinished_renders), desc='Files', miniters=1, position=0, maxinterval=1, smoothing=0)) print('Rendered {} new files!'.format(sum(num_rendered)))
def get_metadata(renders: list) -> list: """ Unpacks the metadata of a recording and checks its validity. """ good_renders = [] bad_renders = [] for recording_name, render_path in tqdm.tqdm(renders): if E(render_path): # Check if metadata has already been extracted. # If it has been computed see if it is valid # or not. if E(J(render_path, GOOD_MARKER_NAME)): good_renders.append((recording_name, render_path)) else: bad_renders.append((recording_name, render_path)) black_list.add(recording_name) return good_renders, bad_renders
def construct_data_dirs(black_list): """ Constructs the render directories omitting elements on a blacklist. """ print(DATA_DIR) if not E(DATA_DIR): os.makedirs(DATA_DIR) data_dirs = [] for exp_folder in tqdm.tqdm(next(os.walk(DATA_DIR))[1], desc='Directories', position=0): for experiment_dir in tqdm.tqdm(next(os.walk(J(DATA_DIR, exp_folder)))[1], desc='Experiments', position=1): if not exp_folder.startswith('MineRL') \ and experiment_dir.split('g1_')[-1] not in black_list: data_dirs.append((experiment_dir, exp_folder)) return data_dirs
def construct_data_dirs(blacklist): """ Constructs the render directories omitting elements on a blacklist. """ if not E(DATA_DIR): os.makedirs(DATA_DIR) if not E(RENDER_DIR): os.makedirs(RENDER_DIR) render_dirs = [] for filename in tqdm.tqdm(next(os.walk(RENDER_DIR))[1]): if filename not in black_list: render_path = J(RENDER_DIR, filename) if not E(render_path): continue render_dirs.append((filename, render_path)) return render_dirs
def construct_data_dirs(black_list): """ Constructs the render directories omitting elements on a blacklist. """ print(DATA_DIR) if not E(DATA_DIR): os.makedirs(DATA_DIR) data_dirs = [] for exp_folder in tqdm.tqdm(next(os.walk(DATA_DIR))[1], desc='Directories', position=0): for experiment_dir in tqdm.tqdm(next(os.walk(J(DATA_DIR, exp_folder)))[1], desc='Experiments', position=1): exp_name = "{}/{}".format(exp_folder, experiment_dir) if exp_folder.startswith('MineRL'): print("{} was skipped because it begins with 'MineRL'".format( exp_name)) elif not experiment_dir.startswith('g1_'): # Skip silently unless file doesn't begin with v3_. # v3_ files are results of this script. The file could end up in this # logic block if the Gym ID of the environment doesn't begin with MineRL. # That could happen for environments that are not officially MineRL envs, # like when someone is building a MineRL extension. if not experiment_dir.startswith('v3_'): print("{} was skipped because {} doesn't begin with 'v3_'". format(exp_name)) elif experiment_dir.split('g1_')[-1] in black_list: print( "{} was skipped because it is cached on the dynamically generated blacklist" .format(exp_name)) else: data_dirs.append((experiment_dir, exp_folder)) return data_dirs
def package(out_dir=DATA_DIR): # Verify version if DATA_DIR is None: raise RuntimeError('MINERL_DATA_ROOT is not set!') with open(os.path.join(DATA_DIR, minerl.data.VERSION_FILE_NAME)) as version_file: version_file_num = int(version_file.readline()) if minerl.data.DATA_VERSION != version_file_num: raise RuntimeError( 'Data version is out of date! MineRL data version is {} but VERSION file is {}' .format(minerl.data.DATA_VERSION, version_file_num)) logging.info("Writing tar files to {}".format(out_dir)) if not os.path.exists(out_dir): os.makedirs(out_dir) # Collect experiment folders exp_folders = [ f for f in os.listdir(DATA_DIR) if f.startswith('MineRL') and '.' not in f ] # Generate tar archives os.chdir(DATA_DIR) with tarfile.open(os.path.join(out_dir, 'data_texture_0_low_res.tar'), "w") as archive: logging.info( 'Generating archive {}'.format('data_texture_0_low_res.tar')) archive.add('VERSION') for folder in exp_folders: archive.add(folder) with tarfile.open( os.path.join(out_dir, 'data_texture_0_low_res_minimal.tar'), "w") as archive: logging.info('Generating archive {}'.format( 'data_texture_0_low_res_minimal.tar')) archive.add('VERSION') random.seed(minerl.data.DATA_VERSION) for folder in exp_folders: for _ in range(5): archive.add( J(folder, random.choice(os.listdir(J(DATA_DIR, folder))))) # Generate individual tar files for folder in exp_folders: with tarfile.open(J(out_dir, folder + '.tar'), "w") as archive: logging.info('Generating archive {}.tar'.format(folder)) archive.add('VERSION') archive.add(folder) # Generate hash files # logging.info('Generating hashes for all files') # subprocess.run(['md5sum', '*.tar.gz', '>', J(out_dir, 'MD5SUMS')], cwd=out_dir) # subprocess.run(['sha1sum', 'MineRL*.tar.gz', '|', 'SHA1SUMS ']) archives = [a for a in os.listdir(out_dir) if a.endswith('.tar')] with open(J(out_dir, 'MD5SUMS'), 'w') as md5_file, \ open(J(out_dir, 'SHA1SUMS'), 'w') as sha1_file, \ open(J(out_dir, 'SHA256SUMS'), 'w') as sha256_file: for archive in archives: logging.info('Generating hashes for {}'.format(archive)) archive_dir = os.path.join(out_dir, archive) md5_file.write('{} {}\n'.format( hashlib.md5(open(archive_dir, 'rb').read()).hexdigest(), archive)) sha1_file.write('{} {}\n'.format( hashlib.sha1(open(archive_dir, 'rb').read()).hexdigest(), archive)) sha256_file.write('{} {}\n'.format( hashlib.sha256(open(archive_dir, 'rb').read()).hexdigest(), archive))
def render_data(output_root, recording_dir, experiment_folder, black_list, extra_env_specs=(), lineNum=None): # Script to to pair actions with video recording # All times are in ms and we assume a actions list, a timestamp file, and a dis-synchronous mp4 video # File-Names segment_str = recording_dir.split('g1_')[-1] if segment_str in black_list: return 0 # Generate Numpy source_folder = J(DATA_DIR, experiment_folder, recording_dir) recording_source = J(source_folder, 'recording.mp4') universal_source = J(source_folder, 'univ.json') metadata_source = J(source_folder, 'metadata.json') # Gather all renderable environments for this experiment directory rendered_envs = 0 all_env_specs = (*envs.ENVS, *extra_env_specs) filtered_environments = [ env_spec for env_spec in all_env_specs if env_spec.is_from_folder(experiment_folder) ] # Don't render if files are missing if not E(source_folder) or not E(recording_source) or not E( universal_source) or not E(metadata_source): print("Blacklisting {}/{}".format(experiment_folder, recording_dir)) black_list.add(segment_str) return 0 # Process universal json with open(universal_source, 'r') as json_file: universal = json.load(json_file) universal = remove_initial_frames(universal) for environment in filtered_environments: dest_folder = J(output_root, environment.name, 'v{}_{}'.format(PUBLISHER_VERSION, segment_str)) recording_dest = J(dest_folder, 'recording.mp4') rendered_dest = J(dest_folder, 'rendered.npz') metadata_dest = J(dest_folder, 'metadata.json') # TODO remove to incrementally render files - during testing re-render each time if E(rendered_dest): os.remove(J(rendered_dest)) # Don't render again, ensure source exits if E(rendered_dest): continue # Load relevant handlers info_handlers = [ obs for obs in environment.observables if not isinstance(obs, handlers.POVObservation) ] reward_handlers = [ r for r in environment.mission_handlers if isinstance(r, handlers.RewardHandler) ] # done_handlers = [hdl for hdl in task.create_mission_handlers() if isinstance(hdl, handlers.QuitHandler)] action_handlers = environment.actionables all_handlers = [ hdl for sublist in [info_handlers, reward_handlers, action_handlers] for hdl in sublist ] for hdl in all_handlers: try: if "reset" in dir(hdl): hdl.reset() except (NotImplementedError, AttributeError): continue try: published = dict(reward=np.array([ sum([ hdl.from_universal(universal[tick]) for hdl in reward_handlers ]) for tick in universal ], dtype=np.float32)[1:]) for tick in universal: tick_data = {} for _prefix, hdlrs in [("observation", info_handlers), ("action", action_handlers)]: if _prefix not in tick_data: tick_data[_prefix] = OrderedDict() for handler in hdlrs: # Apply the handler from_universal to the universal[tick] val = handler.from_universal(universal[tick]) assert val in handler.space, \ "{} is not in {} for handler {}".format(val, handler.space, handler.to_string) tick_data[_prefix][handler.to_string()] = val # Perhaps we can wrap here if isinstance(environment, EnvWrapper): if _prefix == "observation": tick_data[_prefix][ 'pov'] = environment.observation_space[ 'pov'].no_op() tick_data[ _prefix] = environment.wrap_observation( tick_data[_prefix]) del tick_data[_prefix]['pov'] elif _prefix == "action": tick_data[_prefix] = environment.wrap_action( tick_data[_prefix]) tick_data = flatten(tick_data, sep='$') for k, v in tick_data.items(): try: published[k].append(v) except KeyError: published[k] = [v] # Adjust the action one forward (recall that the action packet is one off.) for k in published: if k.startswith("action"): published[k] = published[k][1:] except NotImplementedError as err: print('Exception:', str(err), 'found with environment:', environment.name) raise err except KeyError as err: print("Key error in file - check from_universal for handlers") print((err)) continue except AssertionError as e: # Warn the user if some of the observatiosn or actions don't fall in the gym.space # (The space checking assertion error from above was raised) print("Warning!" + str(e)) import traceback traceback.print_exc() continue except Exception as e: print("caught exception:", str(e)) for hdl in all_handlers: try: for tick in universal: hdl.from_universal(universal[tick]) except Exception as f: print("Exception <", str(f), "> for command handler:", hdl) continue raise e if 'Caves' in environment.name: # Without this exception, Caves is autoblacklisted because reward == 0.0. pass # TODO(shwang): Move the following smoke checks to the appropriate EnvSpecs. # At that point, we can get rid of this whole if-else tree. elif 'Survival' not in environment.name and not isinstance( environment, Obfuscated): # TODO these could be handlers instead! if sum(published['reward']) == 1024.0 and 'Obtain' in environment.name \ or sum(published['reward']) < 64 and ('Obtain' not in environment.name) \ or sum(published['reward']) == 0.0 \ or sum(published['action$forward']) == 0 \ or sum(published['action$attack']) == 0 and 'Navigate' not in environment.name: black_list.add(segment_str) print('Hey we should have blacklisted {} tyvm'.format( segment_str)) return 0 # TODO(shwang): For now, `bool(reason)` is always False. But later when we've # ported the if-else logic immediately above, it will describe why the # environment was blacklisted. reason = environment.auto_blacklist(published) if reason: print( "{} is auto-blacklisting {}/{} because {}.".format( environment.__class__.__name__, experiment_folder, recording_dir, reason, ), ) black_list.add(segment_str) # Setup destination root if not E(dest_folder): try: os.makedirs(dest_folder, exist_ok=True) except OSError as exc: print('Could not make folder: ', dest_folder) raise exc # Render metadata try: # Copy video if necessary if not E(recording_dest): # Use hardlink to save diskspace. Compatible with both Unix and Windows. os.link(src=recording_source, dst=recording_dest) np.savez_compressed(rendered_dest, **published) with open(metadata_source, 'r') as meta_file: source = json.load(meta_file) metadata_out = {} metadata_out['success'] = str( environment.determine_success_from_rewards( published['reward'])) metadata_out['duration_ms'] = len( published['reward'] ) * 50 # source['end_time'] - source['start_time'] metadata_out['duration_steps'] = len(published['reward']) metadata_out['total_reward'] = sum(published['reward']) metadata_out['stream_name'] = 'v{}{}'.format( PUBLISHER_VERSION, recording_dir[len('g1'):]) metadata_out[ 'true_video_frame_count'] = calculate_frame_count( recording_dest) with open(metadata_dest, 'w') as meta_file_out: json.dump(metadata_out, meta_file_out) rendered_envs += 1 except (KeyError, ValueError) as e: print(e) shutil.rmtree(dest_folder, ignore_errors=True) continue return rendered_envs
def gen_sarsa_pairs(outputPath, inputPath, recordingName, lineNum=None, debug=False): # Script to to pair actions with video recording # All times are in ms and we assume a actions list, a timestamp file, and a dis-syncronous mp4 video # Decide if absolute or relative (old format) # Disable data generation for old format if E(J(inputPath, 'metaData.json')): metadata = json.load(open(J(inputPath, 'metaData.json'))) if 'generator' in metadata: version = metadata['generator'].split('-')[-2] if int(version) < 103: return 0 else: tqdm.tqdm.write('No metadata in ' + inputPath) return 0 # Generate recording segments # Sorted pairs of (start, stop, exprementName) timestamps (in ms) segments = [] numNewSegments = 0 markers = OrderedDict() streamMetadata = json.load(open(J(inputPath, 'stream_meta_data.json'))) if 'markers' in streamMetadata: markers_sp = streamMetadata['markers'] for marker in markers_sp: markers[marker['realTimestamp']] = marker else: if debug: print( 'No markers found in stream_meta_data.json! Was it over-writen on re-render?' ) return 0 startTime = None startTick = None startMarker = None # If we have to load univ_json ensure we don't load it again univ_json = None for key, marker in sorted(markers.items()): expName = "" # Get experiment name (its a malformed json so we have to look it up by hand) if 'value' in marker and 'metadata' in marker[ 'value'] and 'expMetadata' in marker['value']['metadata']: meta = marker['value']['metadata'] malformedStr = meta['expMetadata'] jsonThing = json.loads( malformedStr[malformedStr.find('experimentMetadata') + 19:-1]) if 'experiment_name' in jsonThing: expName = jsonThing['experiment_name'] if expName == 'o_meat' and 'tick' in meta and 'stopRecording' in meta and meta[ 'stopRecording']: # Look backwards for meat at most 32 ticks in the past # Lets players who were assigned obtain cooked X become winners for obtain cooked Y tick = meta['tick'] if univ_json is None: univ_json = json.loads( open(J(inputPath, 'univ.json')).read()) for i in range(32): if str(tick - i) in univ_json and 'slots' in univ_json[ str(tick - i)]: slot = [ elem.values() for elem in univ_json[str( tick - i)]['slots']['gui']['slots'] if 'item.porkchopCooked' in elem.values() or 'item.beefCooked' in elem.values() or 'item.muttonCooked' in elem.values() ] if len(slot) == 0: continue if 'item.porkchopCooked' in slot[0]: expName += '/cooked_pork' break if 'item.beefCooked' in slot[0]: expName += '/cooked_beef' break if 'item.muttonCooked' in slot[0]: expName += '/cooked_mutton' break else: break if expName == 'o_bed' and 'tick' in meta and 'stopRecording' in meta and meta[ 'stopRecording']: # Look backwards for a bed at most 32 ticks in the past # Lets players who were assigned obtain cooked X become winners for obtain cooked Y tick = meta['tick'] if univ_json is None: univ_json = json.loads( open(J(inputPath, 'univ.json')).read()) for i in range(32): if str(tick - i) in univ_json and 'slots' in univ_json[ str(tick - i)]: slot = [ elem.values() for elem in univ_json[str( tick - i)]['slots']['gui']['slots'] if 'item.bed.black' in elem.values() or 'item.bed.white' in elem.values() or 'item.bed.yellow' in elem.values() ] if len(slot) == 0: continue if 'item.bed.black' in slot[0]: expName += '/black' break if 'item.bed.yellow' in slot[0]: expName += '/yellow' break if 'item.bed.white' in slot[0]: expName += '/white' break else: break def treechop_finished(tick): gui = tick['slots']['gui'] num_logs = 0 if 'ContainerPlayer' in gui['type']: for slot in gui['slots']: # accounts for log and log2 if slot and 'log' in slot['name']: num_logs += slot['count'] return num_logs >= 64 def treechop_adjust(univ, t): return def o_iron_finished(tick): try: changes = tick['diff']['changes'] for change in changes: if change[ 'item'] == 'minecraft:iron_pickaxe' and change[ 'quantity_change'] > 0: return True except KeyError: pass return False def o_iron_adjust(univ, t): try: univ[t]['diff']['changes'] = [{ 'item': 'minecraft:iron_pickaxe', 'variant': 0, 'quantity_change': 1 }] except KeyError: pass def o_dia_finished(tick): try: changes = tick['diff']['changes'] for change in changes: if change['item'] == 'minecraft:diamond' and change[ 'quantity_change'] > 0: return True except KeyError: pass return False def o_dia_adjust(univ, t): # print(univ[t]) try: univ[t]['diff']['changes'] = [{ 'item': 'minecraft:diamond', 'variant': 0, 'quantity_change': 1 }] # print(univ[t]) except KeyError: pass def nav_finished(tick): try: for block in tick['touched_blocks']: if 'minecraft:diamond_block' in block['name']: return True except KeyError: pass return False def nav_adjust(univ, t): try: univ[t]['navigateHelper'] = 'minecraft:diamond_block' except KeyError: pass finish_conditions = { 'survivaltreechop': (treechop_finished, treechop_adjust), 'o_iron': (o_iron_finished, o_iron_adjust), 'o_dia': (o_dia_finished, o_dia_adjust), 'navigate': (nav_finished, nav_adjust), 'navigateextreme': (nav_finished, nav_adjust) } for finish_expName in finish_conditions: condition, adjust = finish_conditions[finish_expName] if expName == finish_expName and 'tick' in meta and 'stopRecording' in meta and meta[ 'stopRecording'] and startTick is not None: if univ_json is None: univ_json = json.loads( open(J(inputPath, 'univ.json')).read()) cond_satisfied = [] metadata = parse_metadata(startMarker, marker) # print("Here is the metadata:") # print(metadata) # print("there it was") # TODO these should be quit handlers that return success True/False for i in range(min(400, meta['tick'] - startTick)): considered_tick = (meta['tick'] - i) try: if condition(univ_json[str(considered_tick)]): cond_satisfied.append(considered_tick) except KeyError: pass cond_satisfied = sorted(cond_satisfied) if cond_satisfied: meta['tick'] = cond_satisfied[0] else: # Add change if winner try: if len(metadata['server_metadata'] ['winners']) > 0: adjust(univ_json, str(meta['tick'])) except (KeyError, TypeError) as e: traceback.print_exc() else: continue if 'startRecording' in meta and meta[ 'startRecording'] and 'tick' in meta: # If we encounter a start marker after a start marker there is an error and we should throw away this # previous start marker and start fresh startTime = key startTick = meta['tick'] startMarker = marker if 'stopRecording' in meta and meta[ 'stopRecording'] and startTime is not None: segments.append( (startMarker, marker, expName, startTick, meta['tick'])) # segments.append((startTime, key, expName, startTick, meta['tick'], startMarker, marker)) startTime = None startTick = None # Layout of segments (new) # 0. 1. 2. 3. 4. # Start Marker : Stop Marker : Experiment Name : Start Tick : Stop Tick # (hack patch) # 0. 1. 2. 3. 4. 5. 6 # startTime : stopTime : expName : startTick : stopTick : startMarker : stopMarker if not E(J(inputPath, "recording.mp4")): if debug: tqdm.tqdm.write('No recording found in ' + inputPath) return 0 if len(markers) == 0: if debug: tqdm.tqdm.write('No valid markers found') return 0 if univ_json is None: univ_json = json.loads(open(J(inputPath, 'univ.json')).read()) if 'ticks' not in univ_json: if debug: tqdm.tqdm.write('No ticks file in ' + inputPath) return 0 ticks = univ_json['ticks'] videoOffset_ms = streamMetadata['start_timestamp'] videoOffset_ticks = get_tick(ticks, videoOffset_ms) segments = [ (segment[0], segment[1], segment[2], segment[3] - videoOffset_ticks, segment[4] - videoOffset_ticks) for segment in segments ] segments = [ segment for segment in segments if segment[4] - segment[3] > EXP_MIN_LEN_TICKS and segment[3] > 0 ] pbar = tqdm.tqdm(total=len(segments), desc='Segments', leave=False, position=lineNum) if not segments or len(segments) == 0: if debug: tqdm.tqdm.write('No segments in ' + inputPath) return 0 try: if E(J(inputPath, 'keyframes_recording.mp4')): os.remove(J(inputPath, 'keyframes_recording.mp4')) add_key_frames(inputPath, segments) except subprocess.CalledProcessError as exception: open('errors.txt', 'a+').write( "Error splitting {}:\033[0;31;47m {} \033[0m 0;31;47m". format(recordingName, exception) + inputPath + '\n') return 0 for pair in segments: time.sleep(0.05) startMarker = pair[0] stopMarker = pair[1] experimentName = pair[2] startTick = pair[3] stopTick = pair[4] # BAH introduce versioning experiment_id = 'g{}_{}'.format( GENERATE_VERSION, recordingName[len('player_stream_'):]) + "_" + str( int(startTick)) + '-' + str(int(stopTick)) output_name = J(outputPath, experimentName, experiment_id, 'recording.mp4') univ_output_name = J(outputPath, experimentName, experiment_id, 'univ.json') meta_output_name = J(outputPath, experimentName, experiment_id, 'metadata.json') output_dir = os.path.dirname(output_name) if not E(output_dir): os.makedirs(output_dir) if not (E(output_name) and E(univ_output_name) and E(meta_output_name)): try: # Only load universal json if needed if univ_json is None: univ_json = json.loads( open(J(inputPath, 'univ.json')).read()) # Remove potentially stale elements if E(output_name): os.remove(output_name) if E(univ_output_name): os.remove(univ_output_name) if E(meta_output_name): os.remove(meta_output_name) json_to_write = {} for idx in range(startTick, stopTick + 1): json_to_write[str(idx - startTick)] = univ_json[str(idx)] # Split universal.json json.dump(json_to_write, open(univ_output_name, 'w')) # Split metadata.json json.dump(metadata, open(meta_output_name, 'w')) # Split video (without re-encoding) extract_subclip(inputPath, startTick, stopTick, output_name) numNewSegments += 1 pbar.update(1) except KeyboardInterrupt: return numNewSegments except KeyError: open('errors.txt', 'a+').write("Key Error " + str(idx) + " not found in universal json: " + inputPath + '\n') continue except Exception as e: open('errors.txt', 'a+').write("Exception in segment rendering" + str(e) + str(type(e)) + inputPath + '\n') continue return numNewSegments
def main(parallel: bool = True, n_workers: int = NUM_MINECRAFT_DIR): """ The main render script. Args: parallel: If True, then use true multiprocessing to parallelize jobs. Otherwise, use multithreading which allows breakpoints and other debugging tools, but is slower. """ if not E(MERGED_DIR): print("{} does not exist. Run merge.py first.".format(MERGED_DIR)) # 1. Load the blacklist. blacklist_path = Path(BLACKLIST_PATH) if not blacklist_path.exists(): blacklist_path.parent.mkdir(parents=True, exist_ok=True) blacklist_path.touch() blacklist = set(np.loadtxt(BLACKLIST_PATH, dtype=np.str).tolist()) print("Constructing render directories.") renders = construct_render_dirs(blacklist) print("Validating metadata from files:") valid_renders, invalid_renders = render_metadata(renders) print(len(valid_renders)) # print("Rendering actions: ") # valid_renders, invalid_renders = render_actions(valid_renders) print("... found {} valid recordings and {} invalid recordings" " out of {} total files".format(len(valid_renders), len(invalid_renders), len(os.listdir(MERGED_DIR)))) unfinished_renders = [ v for v in valid_renders if not len(glob.glob(J(v[1], '*.mp4'))) ] print("... found {} unfinished renders out of {} valid renders".format( len(unfinished_renders), len(valid_renders))) print("Rendering videos: ") clean_render_dirs() # _render_videos(manager, unfinished_renders[0]) if parallel: import multiprocessing multiprocessing.freeze_support() else: import multiprocessing.dummy as multiprocessing # Render videos in multiprocessing queue with multiprocessing.Pool(n_workers, initializer=tqdm.tqdm.set_lock, initargs=(multiprocessing.RLock(), )) as pool: manager = ThreadManager(multiprocessing.Manager(), n_workers, 0, 1) func = functools.partial(_render_videos, manager) num_rendered = list( tqdm.tqdm(pool.imap_unordered(func, unfinished_renders), total=len(unfinished_renders), desc='Files', miniters=1, position=0, maxinterval=1, smoothing=0)) print('Rendered {} new files!'.format(sum(num_rendered)))
def render_metadata(renders: list): """ Unpacks the metadata of a recording and checks its validity. """ good_renders = [] bad_renders = [] for recording_name, render_path in tqdm.tqdm(renders): if E(render_path): # Check if metadata has already been extracted. # if (E(J(render_path, GOOD_MARKER_NAME)) or # E(J(render_path, BAD_MARKER_NAME))): # # If it has been computed see if it is valid # # or not. # if E(J(render_path, GOOD_MARKER_NAME)): # good_renders.append((recording_name, render_path)) # else: # bad_renders.append((recording_name, render_path)) # else: # BAH check metadata each time if True: try: recording = get_recording_archive(recording_name) def extract(fname): return recording.extract(fname, render_path) # If everything is good extract the metadata. for mfile in METADATA_FILES: assert str(mfile) in [ str(x) for x in recording.namelist() ] if not E(J(render_path, mfile)): extract(mfile) # check that stream_meta_data is good with open(J(render_path, 'metaData.json'), 'r') as f: # print(render_path) jbos = json.load(f) # assert (ile["duration"] > 60000 or jbos["duration"] == 0) assert (jbos["duration"] > 300000) # go through and check if we got the experiments. try: with open(J(render_path, 'markers.json'), 'r') as f: markers = json.load(f) has_any_exps = False for marker in markers: exp_metadata = marker['value']['metadata'][ 'expMetadata'] for exp in RENDER_ONLY_EXPERIMENTS: has_any_exps = ( exp in exp_metadata) or has_any_exps assert has_any_exps except (KeyError, FileNotFoundError): raise AssertionError("Couldn't open metadata json.") # check that stream_meta_data is good with open(J(render_path, 'stream_meta_data.json'), 'r') as f: jbos = json.load(f) assert jbos["has_EOF"] assert not jbos["miss_seq_num"] touch(J(render_path, GOOD_MARKER_NAME)) remove(J(render_path, BAD_MARKER_NAME)) good_renders.append((recording_name, render_path)) except (json.decoder.JSONDecodeError, AssertionError): _, _, tb = sys.exc_info() traceback.print_tb(tb) # Fixed format # Mark that this is a bad file. touch(J(render_path, BAD_MARKER_NAME)) remove(J(render_path, GOOD_MARKER_NAME)) bad_renders.append((recording_name, render_path)) return good_renders, bad_renders
def clean_render_dirs(): paths_to_clear = [RENDERED_VIDEO_PATH, RECORDING_PATH, RENDERED_LOG_PATH] for p in paths_to_clear: map(remove, [glob.glob(J(x, '*')) for x in p]) pass
def render_videos(render: tuple, index=0, debug=False): """ For every render directory, we render the videos. This works by: 1) Copying the file to the minecraft directory 2) Waiting for user input: User render the video using replay mod and hit enter once the video is rendered 3) Copying the produced mp4 to the rendered directory """ # Restart minecraft after so many renders maxConsecutiveRenders = 1 numSuccessfulRenders = 0 # Remove any finished file flags to prevent against copying unfinished renders try: os.remove(FINISHED_FILE[index]) except FileNotFoundError: pass # Clear recording directory to protect against crash messages for messyFile in glob.glob(J(RECORDING_PATH[index], '*')): try: os.remove(messyFile) except IsADirectoryError: shutil.rmtree(messyFile) p = None try: recording_name, render_path = render # Get mcpr file from merged tqdm.tqdm.write("Rendering {} ...".format(recording_name)) # Skip if the folder has an recording already # * means all if need specific format then *.csv list_of_files = glob.glob(J(render_path, '*.mp4')) if len(list_of_files): tqdm.tqdm.write("\tSkipping: replay folder contains {}".format( list_of_files[0])) return 0 # Skip if the file has been skipped already skip_path = J(render_path, SKIPPED_RENDER_FLAG) if E(skip_path): tqdm.tqdm.write("\tSkipping: file was previously skipped") return 0 mcpr_path = J(MERGED_DIR, (recording_name + ".mcpr")) copyfile(mcpr_path, J(RECORDING_PATH[index], (recording_name + ".mcpr"))) copy_time = os.path.getmtime( J(RECORDING_PATH[index], (recording_name + ".mcpr"))) if not E(LOG_FILE[index]): os.makedirs(os.path.dirname(LOG_FILE[index]), exist_ok=True) Path(LOG_FILE[index]).touch() logFile = open(LOG_FILE[index], 'r', os.O_NONBLOCK) lineCounter = 0 # RAH So we can print line number of the error # Render the file p = launchMC(index) # Wait for completion (it creates a finished.txt file) video_path = None notFound = True errorDir = None while notFound: if os.path.exists(FINISHED_FILE[index]) or p.poll() is not None: if os.path.exists(FINISHED_FILE[index]): os.remove(FINISHED_FILE[index]) notFound = False numSuccessfulRenders += 1 else: notFound = True try: if debug: print("Waiting for Minecraft to close") p.wait(400) if debug: print("Minecraft closed") except TimeoutError: tqdm.tqdm.write("Timeout") p.kill() # killMC(p) except: tqdm.tqdm.write("Error stopping") # p = launchMC(index) # if(numSuccessfulRenders > maxConsecutiveRenders): # killMC(p) # numSuccessfulRenders = 0 # # time.sleep(5) # p = launchMC() break else: logLine = logFile.readline() if len(logLine) > 0: lineCounter += 1 if re.search(r"EOFException:", logLine): if debug: print("\tfound java.io.EOFException") errorDir = EOF_EXCEP_DIR elif re.search(r"Adding time keyframe at \d+ time -\d+", logLine): if debug: print("\tfound 0 length file") errorDir = ZEROLEN_DIR elif re.search(r"NullPointerException", logLine): if not re.search(r'exceptionCaught', logLine): if debug: print("\tNullPointerException") errorDir = NULL_PTR_EXCEP_DIR elif re.search(r"zip error", logLine) or re.search( r"zip file close", logLine): if debug: print('ZIP file error') errorDir = ZIP_ERROR_DIR elif re.search(r'connect to X11 window server', logLine): if debug: print("X11 error") errorDir = X11_ERROR_DIR elif re.search(r'no lwjgl64 in java', logLine): if debug: print("missing lwjgl.") errorDir = OTHER_ERROR_DIR # elif re.search(r"Exception", logLine): # if debug: # print("Unknown exception!!!") # error_dir = OTHER_ERROR_DIR if errorDir: if debug: print("\tline {}: {}".format(lineCounter, logLine)) break # p = relaunchMC(p, errorDir, recording_name, skip_path) time.sleep(1) logFile.close() if errorDir: print(errorDir) logError(errorDir, recording_name, skip_path, index) if notFound: try: os.remove(J(RECORDING_PATH[index], (recording_name + ".mcpr"))) except: pass return 0 video_path = None log_path = None marker_path = None # GET RECORDING list_of_files = glob.glob(J(RENDERED_VIDEO_PATH[index], '*.mp4')) if len(list_of_files) > 0: # Check that this render was created after we copied video_path = max(list_of_files, key=os.path.getmtime) if os.path.getmtime(video_path) < copy_time: if debug: print("\tError! Rendered file is older than replay!") print("\tskipping out of date rendering") video_path = None # GET UNIVERSAL ACTION FORMAT list_of_logs = glob.glob(J(RENDERED_LOG_PATH[index], '*.json')) if len(list_of_logs) > 0: # Check that this render was created after we copied log_path = max(list_of_logs, key=os.path.getmtime) if os.path.getmtime(log_path) < copy_time: if debug: print("\tError! Rendered log is older than replay!") print("\tskipping out of date action json") log_path = None # GET new markers.json list_of_logs = glob.glob(J(RENDERED_VIDEO_PATH[index], '*.json')) if len(list_of_logs) > 0: # Check that this render was created after we copied marker_path = max(list_of_logs, key=os.path.getmtime) if os.path.getmtime(marker_path) < copy_time: if debug: print("\tError! markers.json is older than replay!") print("\tskipping out of date markers.json") marker_path = None if not video_path is None and not log_path is None and not marker_path is None: if debug: print("\tCopying file", video_path, '==>\n\t', render_path, 'created', os.path.getmtime(video_path)) shutil.move(video_path, J(render_path, 'recording.mp4')) if debug: print("\tCopying file", log_path, '==>\n\t', render_path, 'created', os.path.getmtime(log_path)) shutil.move(log_path, J(render_path, 'univ.json')) if debug: print("\tRecording start and stop timestamp for video") metadata = json.load(open(J(render_path, 'stream_meta_data.json'))) videoFilename = video_path.split('/')[-1] metadata['start_timestamp'] = int(videoFilename.split('_')[1]) metadata['stop_timestamp'] = int( videoFilename.split('_')[2].split('-')[0]) with open(marker_path) as markerFile: metadata['markers'] = json.load(markerFile) json.dump(metadata, open(J(render_path, 'stream_meta_data.json'), 'w')) else: if debug: print("\tMissing one or more file") print("\tSkipping this file in the future") print("\t{} {} {}".format(video_path, marker_path, log_path)) logError(MISSING_RENDER_OUTPUT, recording_name, skip_path, index) try: os.remove(J(RECORDING_PATH[index], (recording_name + ".mcpr"))) except: pass return 0 # Remove mcpr file from dir try: os.remove(J(RECORDING_PATH[index], (recording_name + ".mcpr"))) except: pass finally: if p is not None: try: p.wait(400) except (TimeoutError, subprocess.TimeoutExpired): p.kill() time.sleep(10) return 1