def process_mirror(file): ''' Finds where the audio starts and ends in the file and mirror all the data around its center ''' data = np.fft.rfft(open_file(file)) result = np.copy(data) threshold = 1000 # Find the point at which the audio starts start = 0 while (np.imag(result[start]) <= threshold and np.imag(result[start]) >= -threshold): start += 1 # Find the point at which the audio ends end = len(result) - 1 while (np.imag(result[end]) <= threshold and np.imag(result[end]) >= -threshold): end -= 1 # Compute the center and mirror around that point center = abs((end - start)) // 2 for i in range(1, center): swapFrom = start + (center - i) swapTo = start + (center + i) tmp = result[swapFrom] result[swapFrom] = result[swapTo] result[swapTo] = tmp return np.fft.irfft(result)
def process_pitch_shift(file): ''' Finds by how much an audio file and been shifted and shifts it back ''' data = np.fft.rfft(open_file(file)) # Find the shift amount by exploring every value under a given threshold shiftAmount = 0 while (np.imag(data[shiftAmount]) <= 500 and np.imag(data[shiftAmount]) >= -500): shiftAmount += 1 print("Found ! The message has been shifted by ", shiftAmount) # Copy our input array (deep copy) result = np.copy(data) # Get the number of elements in our input list M = len(data) for i in range(M - shiftAmount): result[i] = data[i + shiftAmount] return np.fft.irfft(result)
def build(config): """Build function """ global OUTPUT_IO global OBJS if not os.path.isdir(config["source"]): ERROR("Can't find %s", config["source"]) return OUTPUT_IO = open_file(config["target"]["path"], "wb") OUTPUT_IO.write( """<?xml version="1.0" encoding="utf-8"?>\n""" """<Application>\n""" ) OBJS = {} write_app_info(config) write_pages(config) write_e2vdom(config) write_libraries(config) # write_structure(config) OUTPUT_IO.write(""" <Structure/>\n""") OUTPUT_IO.write(""" <Backupfiles/>\n""") write_resources(config) write_databases(config) write_security(config) OUTPUT_IO.write("</Application>") OUTPUT_IO.close()
def write_libraries(config): INFO("Libraries Data: Processing...") libs_path = os.path.join(config["source"], constants.LIBRARIES_FOLDER) if not os.path.exists(libs_path): CRITICAL("Can't find: {}".format(libs_path)) emergency_exit() write_xml("Libraries", indent=2) files = list(set(os.listdir(libs_path)) - set(constants.RESERVED_NAMES)) for lib_name in sorted(files): lib_path = os.path.join(libs_path, lib_name) if not os.path.isfile(lib_path): continue DEBUG("Open file: %s", lib_path) with open_file(lib_path) as lib_f: write_xml( tagname="Library", attrs={"Name": lib_name.split(".", 1)[0]}, indent=4, data=lib_f.read(), close=True ) write_xml("Libraries", indent=2, closing=True) INFO("Libraries Data: Done!")
def write_object(path, name, indent): with open_file(os.path.join(path, name)) as obj_file: obj_json = json_load(obj_file, critical=True) if "Type" in obj_json["attrs"] \ and obj_json["attrs"]["Type"] in constants.EXTERNAL_SOURCE_TYPES \ and "source_file_name" in obj_json["attrs"]: source_file_name = obj_json["attrs"]["source_file_name"] del obj_json["attrs"]["source_file_name"] with open_file(os.path.join(path, source_file_name)) as source_file: obj_json["attributes"]["source"] = clean_data(source_file.read()).decode('utf-8') write_xml("Object", attrs=obj_json["attrs"], indent=indent) write_xml("Actions", indent=indent+2, data="", close=True) write_xml("Objects", indent=indent+2, data="", close=True) write_attributes(obj_json["attributes"], indent+2) write_xml("Object", indent=indent, closing=True)
def write_file(self, name, data): """Write data to file """ path = build_path(self.current_path(), name) DEBUG("Writing data to %s", path) with open_file(path, "wb") as hdlr: hdlr.write(data.encode('utf-8') if type(data) == unicode else data)
def parse_one_script(path, filename, output_path): all_html = h.open_file(filename, path) soup = BeautifulSoup(all_html, 'html.parser') try: script = soup.find_all("pre")[0] h.write_to_file(str(script), filename, output_path) return None except: print "Error: no <pre> in", filename return filename
def write_json_file(self, name, data): """Convert data to JSON and write it to file """ path = build_path(self.current_path(), name) DEBUG("Writing JSON data to %s", path) with open_file(path, "wb") as hdlr: json_dump(data, hdlr, critical=True)
def write_actions(path, indent): actions_map_path = os.path.join(path, constants.MAP_FILE) if not os.path.exists(actions_map_path): INFO("Can't find: %s; skipping Actions", actions_map_path) write_xml("Actions", indent=indent) write_xml("Actions", indent=indent, closing=True) return with open_file(actions_map_path) as actions_map_file: actions_map = json_load(actions_map_file, critical=True) write_xml("Actions", indent=indent) for action_name in sorted(os.listdir(path)): action_path = os.path.join(path, action_name) if not os.path.isfile(action_path) or \ action_name in constants.RESERVED_NAMES: continue attrs = actions_map.get(action_name, None) if not attrs: attrs = { "Top": "", "State": "", "Left": "", "ID": str(gen_guid()), "Name": action_name.split(".", 1)[0], } with open_file(action_path) as action_f: write_xml( tagname="Action", attrs=attrs, indent=indent+2, data=action_f.read(), close=True, force_cdata=True ) write_xml("Actions", indent=indent, closing=True)
def write_resources(config): INFO("Resources Data: Processing...") resources_path = os.path.join(config["source"], constants.RESOURCES_FOLDER) if not os.path.exists(resources_path): CRITICAL("Can't find: {}".format(resources_path)) emergency_exit() write_xml("Resources", indent=2) files = list(set(os.listdir(resources_path)) - set(constants.RESERVED_NAMES)) for res_name in sorted(files): res_path = os.path.join(resources_path, res_name) if not os.path.isfile(res_path): continue raw_name = res_name.split("_", 2) try: res_guid = UUID(raw_name[0]) except ValueError: res_guid = gen_guid() res_type = res_name.rsplit(".", 1) res_type = res_type[1] if len(res_type) == 2 else "res" else: res_type = raw_name[1] res_name = raw_name[2] attrs = { "ID": res_guid, "Name": res_name, "Type": res_type } DEBUG("Open file: %s", res_path) with open_file(res_path) as res_f: write_xml( tagname="Resource", attrs=attrs, indent=4, data=base64.b64encode(res_f.read()), close=True ) write_xml("Resources", indent=2, closing=True) INFO("Resources Data: Done!")
def write_databases(config): INFO("Databases Data: Processing...") dbs_path = os.path.join(config["source"], constants.DATABASES_FOLDER) if not os.path.exists(dbs_path): DEBUG("Can't find: {}".format(dbs_path)) return write_xml("Databases", indent=2) files = list(set(os.listdir(dbs_path)) - set(constants.RESERVED_NAMES)) for db_name in sorted(files): db_path = os.path.join(dbs_path, db_name) if not os.path.isfile(db_path): continue raw_name = db_name.split("_", 1) try: db_guid = UUID(raw_name[0]) except ValueError: db_guid = gen_guid() raw_name = raw_name[-1].split(".", 1) db_name = raw_name[0] db_type = raw_name[1] if len(raw_name) == 2 else "sqlite" attrs = { "ID": db_guid, "Name": db_name, "Type": db_type } DEBUG("Open file: %s", db_path) with open_file(db_path) as db_f: write_xml( tagname="Database", attrs=attrs, indent=4, data=base64.b64encode(db_f.read()), close=True ) write_xml("Databases", indent=2, closing=True) INFO("Databases Data: Done!")
def write_app_info(config): INFO("Application Information Data: Processing...") info_path = os.path.join(config["source"], constants.INFO_FILE) with open_file(info_path) as info_file: info_json = json_load(info_file, critical=True) write_xml("Information", indent=2) for tagname, value in info_json.items(): write_xml(tagname, data=value, close=True, indent=4) write_xml("Information", indent=2, closing=True) INFO("Application Information Data: Done!")
def parse_script_list(filename, path): all_html = h.open_file(filename, path) soup = BeautifulSoup(all_html, 'html.parser') all_links = soup.find_all(href=re.compile("Script")) all_hrefs = [] for a in all_links[5:len(all_links)]: all_hrefs.append(a.get("href")) actual_links = [] for raw_link in all_hrefs: temp = raw_link.partition("/Movie Scripts/")[2] temp = temp.replace(" ", "-") temp = temp.replace("-Script", "") temp = temp.replace("..", ".") temp = temp.replace(":", "") actual_links.append(temp) return actual_links
def rename_all_scripts(): movies = h.read_from_dataset("imdb_dataset_v7.1_6_actors_complete.tsv") scripts_no_tag_path = "clean_scripts/no_tags/" clean_scripts_path = "clean_scripts/finished_renamed/" script_names = os.listdir(scripts_no_tag_path) for script in script_names: temp = script.replace(".html","").replace("-", " ") if re.search(r'\b, The\b', temp): temp = temp.replace(",", "") temp = temp.partition(" The")[0] temp = "The " + temp for title in movies: jac = h.compute_jaccard_index(title.partition(" (")[0],temp) if jac >= 1: print jac, title, temp h.write_to_file(h.open_file(script, scripts_no_tag_path), title.replace("/","."), clean_scripts_path)
def write_structure(config): INFO("Structure Data: Processing...") structure_path = os.path.join(config["source"], constants.STRUCT_FILE) if not os.path.exists(structure_path): ERROR("Can't find: {}".format(structure_path)) write_xml("Structure", indent=2, close=True) return write_xml("Structure", indent=2) with open_file(structure_path) as struct_file: struct_json = json_load(struct_file, critical=True) for obj in struct_json: write_xml("Object", attrs=obj, data="", close=True, indent=4) write_xml("Structure", indent=2, closing=True) INFO("Structure Data: Done!")
def write_e2vdom(config): INFO("E2VDOM Data: Processing...") write_xml("E2vdom", indent=2) pages_path = os.path.join(config["source"], constants.PAGES_FOLDER) all_events = [] all_actions = [] for name in os.listdir(pages_path): e2vdom_path = os.path.join(pages_path, name, constants.E2VDOM_FILE) if not os.path.exists(e2vdom_path): INFO("No file %s; skipping E2VDOM for %s", e2vdom_path, name) continue else: DEBUG("Open file: %s", e2vdom_path) with open_file(e2vdom_path) as e2vdom_file: e2vdom = json_load(e2vdom_file, critical=True) all_events.extend(e2vdom["events"]) all_actions.extend(e2vdom["actions"]) INFO("E2VDOM Data: Writing events") write_xml("Events", indent=4) for event in all_events: actions = event.pop("actions", []) write_xml("Event", attrs=event, indent=6) for action in actions: write_xml( "Action", attrs={"ID": action}, indent=8, data="", close=True ) write_xml("Event", indent=6, closing=True) write_xml("Events", indent=4, closing=True) INFO("E2VDOM Data: Events done!") INFO("E2VDOM Data: Writing actions") write_xml("Actions", indent=4) for action in all_actions: params = action.pop("Params", []) write_xml("Action", attrs=action, indent=6) for key, value in params: write_xml( "Parameter", attrs={"ScriptName": key}, indent=8, data=value, close=True ) write_xml("Action", indent=6, closing=True) write_xml("Actions", indent=4, closing=True) write_xml("E2vdom", indent=2, closing=True) INFO("E2VDOM Data: Actions done!") INFO("E2VDOM Data: Done!")
def walk(path, name, indent): new_path = os.path.join(path, name) actions_folder = "Actions-{}".format(name) info_path = os.path.join(new_path, constants.INFO_FILE) if not os.path.exists(info_path): CRITICAL("Can't find: {}".format(info_path)) emergency_exit() with open_file(info_path) as info_file: info_json = json_load(info_file, critical=True) attrs = info_json["attrs"] if attrs is not None and 'ID' in attrs: id = attrs['ID'] if id in OBJS: ERROR("Encountered duplicate GUID: {duplicate} duplicates {origin}: Ignoring {duplicate}".format( duplicate=name, origin=OBJS[id] )) return else: OBJS[id] = name write_xml("Object", attrs=attrs, indent=indent) write_actions(os.path.join(new_path, actions_folder), indent+2) write_xml("Objects", indent=indent+2) childs_order_path = os.path.join(new_path, constants.CHILDS_ORDER) if os.path.exists(childs_order_path): with open(childs_order_path) as f: names = json_load(f, default=[], critical=False) names = map(lambda s: s.lower(), names) childs_order = dict(zip(names, xrange(len(names)))) else: childs_order = {} max_value = len(childs_order) + 1 def key_func(name): key = name.lower() if key.endswith('.json'): key = key[:-5] return [childs_order.get(key, max_value), name] nodes = list(set(os.listdir(new_path)) - set(constants.RESERVED_NAMES) - {actions_folder}) nodes = [node for node in nodes if not constants.RESERVED_NAMES_REGEXP.match(node)] ordered_nodes = sorted(nodes, key=key_func) for name in ordered_nodes: if os.path.isdir(os.path.join(new_path, name)): walk(new_path, name, indent+4) else: write_object(new_path, name, indent+4) write_xml("Objects", indent=indent+2, closing=True) write_attributes(info_json["attributes"], indent+2) write_xml("Object", indent=indent, closing=True)
def clean_one_script_from_tags(path, filename, output_path): h.write_to_file(ts.strip_tags(h.open_file(filename, path)), filename, output_path)
def open_wav(name): '''Opens a wave file by its name, from the INPUT folder''' return open_file("{}/{}.wav".format(INPUT, name))
def write_security(config): INFO("Security Data: Processing...") security_path = os.path.join(config["source"], constants.SECURITY_FOLDER) if not os.path.exists(security_path): INFO("Can't find: {}".format(security_path)) return groups_and_users_path = \ os.path.join(security_path, constants.USERS_GROUPS_FILE) if os.path.exists(groups_and_users_path): with open_file(groups_and_users_path) as ug_file: ug_json = json_load(ug_file, critical=True) else: ug_json = {} write_xml("Security", indent=2) write_xml("Groups", indent=4, close=True) write_xml("Users", indent=4) INFO("Security Data: Writing users") for user in ug_json.get("users", []): write_xml("User", indent=6) for key, value in user.items(): if key == "Rights": write_xml("Rights", indent=8) for right in value: write_xml( "Right", attrs=right, indent=10, close=True ) write_xml("Rights", indent=8, closing=True) else: write_xml( key, data=value, indent=8, close=True, force_cdata=True ) write_xml("User", indent=6, closing=True) write_xml("Users", indent=4, closing=True) INFO("Security Data: Users done!") INFO("Security Data: Writing LDAP") ldap_path = os.path.join(security_path, constants.LDAP_LDIF) if os.path.exists(ldap_path): with open_file(ldap_path) as ldap_file: write_xml( "LDAP", indent=4, data=base64.b64encode(ldap_file.read()), close=True ) else: write_xml("LDAP", indent=4, data="", close=True) write_xml("Security", indent=2, closing=True) INFO("Security Data: Done!")