def create_target_parent_file(target_directory_path, source_file_path): reference_finder = ReferenceFinder() source_replacer = SourceReplacer() base_name = os.path.basename(source_file_path) with open(source_file_path) as source_file: source_code = source_file.read().replace("\n", " ") php_occurrences = reference_finder.get_php_occurrences(source_code) includes = reference_finder.get_included_php_file_paths(php_occurrences, source_dir_path="", need_compl_path=False) requires = reference_finder.get_required_php_file_paths(php_occurrences, source_dir_path="", need_compl_path=False) if includes.__len__() > 0: source_code = source_replacer.replace_includes(source_code, includes) if requires.__len__() > 0: source_code = source_replacer.replace_requires(source_code, requires) source_code = source_replacer.replace_session_start(source_code) source_code = source_replacer.replace_session_details_assignment(source_code) source_code = source_replacer.replace_session_details_extraction(source_code) source_code = source_replacer.replace_media_references(source_code, source_file_path, DetailsKeeperDO.get_source_dir_path(DetailsKeeperDO)) source_code = BeautifulSoup(source_code, "html.parser") source_code = source_code.prettify() target_file_path = target_directory_path + "/" + base_name if not os.path.exists(target_file_path): with open(target_file_path, "w+") as helper_file: helper_file.write(source_code)
def create_parent_article_file(file_path): source_replacer = SourceReplacer() reference_finder = ReferenceFinder() file_maker = FilesDirectoryMaker() article_register = ComponentRegistry() component_retriever = ComponentRetriever() source_code = SourceReader.get_source(file_path) hyperlink_details = reference_finder.get_links_details(source_code) avoided_file_list = DetailsKeeperDO.get_avoided_file_list(DetailsKeeperDO) for link_details in hyperlink_details: if not "No File" in link_details[1]: if not any(link_details[1] in avoided_file for avoided_file in avoided_file_list): base_name = os.path.basename(link_details[1]) file_name = os.path.splitext(base_name) link_string = link_details[2].split(">") article_id = component_retriever.get_article_id("Article " + file_name[0]) new_link_str = ' href="index.php?option=com_content&view=article&id=' + str(article_id) + ">" + \ link_string[1] source_code = source_code.replace(link_details[2], new_link_str) # print(link_details[2]) source_code = source_replacer.replace_session_management(source_code) source_code = source_replacer.replace_media_references( source_code, file_path, "/opt/lampp/htdocs/Blog") include_counter = reference_finder.has_include(file_path) if include_counter == 1: source_code = file_maker.replace_main_file_includes(source_code) article_base_name = os.path.basename(file_path) article_name = os.path.splitext(article_base_name) article_register.register_article(source_code, article_name[0])
def create_article_file(self, source_file_path): reference_finder = ReferenceFinder() source_replacer = SourceReplacer() with open(source_file_path, "r") as source_file: source_code = source_file.read().replace("\n", " ") source_code = source_code.replace("require_once", "include") source_code = source_code.replace("require", "include") include_counter = reference_finder.has_include(source_file_path) if include_counter == 1: source_code = self.replace_main_file_includes(source_code) source_code = source_replacer.replace_media_references(source_code, source_file_path, "/opt/lampp/htdocs/Blog/") source_code = source_replacer.replace_link_references(source_code)
def replace_main_file_includes(self, source_code): reference_finder = ReferenceFinder() php_occurrences = reference_finder.get_php_occurrences(source_code) complete_includes = reference_finder.get_included_php_file_paths(php_occurrences, DetailsKeeperDO.get_source_dir_path( DetailsKeeperDO), True) includes = reference_finder.get_included_php_file_paths(php_occurrences, DetailsKeeperDO.get_source_dir_path( DetailsKeeperDO), False) i = 0 for complete_include in complete_includes: module_title = self.create_main_module_file(complete_include) replace_string = "?> {module " + module_title + "} <?php" source_code = re.sub(r"include(\s*)(\"|\')" + re.escape(includes[i]) + r"(\"|\')(\s*);", replace_string, source_code) return source_code
def incl_tree(file_name, target_dir_path): global file_tree global file_creation_map reference_finder = ReferenceFinder() with open(file_name) as source_file: source = source_file.read() source = source.replace("\n", " ") php = reference_finder.get_php_occurrences(source) if php.__len__() != 0: includes = reference_finder.get_included_php_file_paths(php, DetailsKeeperDO. get_source_dir_path(DetailsKeeperDO), need_compl_path=True) if includes.__len__() != 0: for include_file in includes: if os.path.exists(str(include_file)): incl_counter = reference_finder.has_include(include_file) if incl_counter == 1: file_tree.update({include_file: file_name}) incl_tree(include_file, target_dir_path) else: file_tree.update({include_file: file_name}) # print("file created : " + include_file) create_target_file(target_dir_path, include_file) if file_name in file_creation_map: count = file_creation_map[file_name] count = count + 1 file_creation_map.update({file_name: count}) else: count = 1 file_creation_map.update({file_name: count}) senior_file = file_tree[include_file] senior_count = file_creation_map[senior_file] if senior_count == includes.__len__(): # print("file created : " + senior_file) create_target_parent_file(target_dir_path, senior_file) if senior_file != check_root(): supreme_file = file_tree[senior_file] if supreme_file in file_creation_map: supreme_count = file_creation_map[supreme_file] supreme_count = supreme_count + 1 file_creation_map.update({supreme_file: supreme_count}) else: supreme_count = 1 file_creation_map.update({supreme_file: supreme_count})
def create_child_articles(file_path): source_replacer = SourceReplacer() reference_finder = ReferenceFinder() file_maker = FilesDirectoryMaker() article_register = ComponentRegistry() source_code = SourceReader.get_source(file_path) source_code = source_replacer.replace_styles(source_code) source_code = source_replacer.replace_session_management(source_code) source_code = source_replacer.replace_media_references( source_code, file_path, "/opt/lampp/htdocs/Blog") include_counter = reference_finder.has_include(file_path) if include_counter == 1: source_code = file_maker.replace_main_file_includes(source_code) # --send to database base_name = os.path.basename(file_path) file_name = os.path.splitext(base_name) article_register.register_article(source_code, file_name[0])
def create_main_module_file(self, source_file_path): reference_finder = ReferenceFinder() source_replacer = SourceReplacer() component_register = ComponentRegistry() base_name = os.path.basename(source_file_path) file_name_details = os.path.splitext(base_name) target_dir_path = "/opt/lampp/htdocs/JoomlaResearchTest/modules/mod_" + file_name_details[0].lower() main_module_file_path = target_dir_path + "/mod_" + base_name.lower() with open(source_file_path, "r") as source_file: source_code = source_file.read().replace("\n", " ") php_occurrences = reference_finder.get_php_occurrences(source_code) complete_includes = [] complete_includes.extend(reference_finder.get_included_php_file_paths(php_occurrences, DetailsKeeperDO.get_source_dir_path( DetailsKeeperDO), need_compl_path=True)) includes = [] includes.extend(reference_finder.get_included_php_file_paths(php_occurrences, DetailsKeeperDO.get_source_dir_path( DetailsKeeperDO), need_compl_path=False)) complete_requires = [] complete_requires.extend(reference_finder.get_required_php_file_paths(php_occurrences, DetailsKeeperDO.get_source_dir_path( DetailsKeeperDO), need_compl_path=True)) requires = [] requires.extend(reference_finder.get_required_php_file_paths(php_occurrences, DetailsKeeperDO.get_source_dir_path( DetailsKeeperDO), need_compl_path=False)) if complete_includes.__len__() > 0: for incl_file in complete_includes: incl_tree(incl_file, target_dir_path) source_code = source_replacer.replace_includes(source_code, includes) source_code = source_replacer.replace_session_management(source_code) source_code = source_replacer.replace_media_references(source_code, source_file_path, DetailsKeeperDO.get_source_dir_path(DetailsKeeperDO)) main_module_file_header = "<?php\n" \ "/***\n" \ "*@package Joomla.Site\n" \ "*@subpackage mod_" + file_name_details[0] + "\n" \ "*@license GNU/GPL, see LICENSE.php\n" \ "@copyright Copyright (C) 2005 - 2018, Open Source Matters, Inc. All rights " \ "reserved.\n" \ "***/\n" \ "// no direct access\n" \ "defined('_JEXEC') or die;" \ "?>" source_code = main_module_file_header + source_code source_code = BeautifulSoup(source_code, "html.parser") with open(main_module_file_path, "w+") as module_file: module_file.write(source_code.prettify()) self.create_target_directory(target_dir_path + "/language") self.create_target_directory(target_dir_path + "/language/en-GB") self.create_target_directory(target_dir_path + "/tmpl") self.create_module_xml_file(target_dir_path) title = component_register.register_module("mod_" + file_name_details[0].replace(" ", "_").lower()) return title
def conversion_format_detector(file_name_list): file_details = [] for file_name in file_name_list: with open(file_name, "r") as altered_source: source = altered_source.read() source = source.replace("\n", " ") ref = ReferenceFinder() php_occurrences = ref.get_php_occurrences(source) # --if there is a html header, get the count of the header part of the file if re.findall('<!DOCTYPE html>', source).__len__() > 0: header = re.findall('<!DOCTYPE html>(.*?)</head>', source) header_length = len('<!DOCTYPE html></head>') + len(header[0]) elif re.findall('<html>', source).__len__() > 0: header = re.findall('(<html>(.*?)</head>)', source) header_length = len('<html></head>') + len(header[0]) else: header_length = 0 file_length = source.__len__() # --get the total length of php codes total_php_length = 0 for occurrence in php_occurrences: total_php_length = total_php_length + len(occurrence) + len( "<?php?>") # --check whether the file contains a html header if header_length > 0: total_php_length = total_php_length + len("<body></body></html>") # --check whether the file contains more than php codes # print(file_length - header_length) # print(total_php_length) if file_length - header_length > total_php_length: file_details.append("article : " + file_name) else: file_details.append("separate : " + file_name) return file_details
def create_article(self, source_file_path): reference_finder = ReferenceFinder() source_replacer = SourceReplacer() file_maker = FilesDirectoryMaker() article_register = ComponentRegistry() component_retriever = ComponentRetriever() article_base_name = os.path.basename(source_file_path) article_name = os.path.splitext(article_base_name) source_code = SourceReader.get_source(source_file_path) source_code = source_replacer.replace_styles(source_code) include_counter = reference_finder.has_include(source_file_path) if include_counter == 1: source_code = file_maker.replace_main_file_includes(source_code) source_code = source_replacer.replace_media_references( source_code, source_file_path, "/opt/lampp/htdocs/Blog/") source_code = source_replacer.replace_link_references(source_code) article_register.register_article(source_code, article_name[0]) article_id = component_retriever.get_article_id("article " + article_name[0]) article_register.register_menu_item(article_id, "Article " + article_name[0])
import os from codeparser.CodeFormatter import format_code from filehandler.connection_creator import ConnectionCreator from filehandler.files_dir_maker import FilesDirectoryMaker from filehandler.file_detector import FileDetector from filehandler.FileParser import conversion_format_detector from stringfinder.header_remover import remove_html_header from stringfinder.LoopDetector import LoopDetector from stringfinder.reference_finder import ReferenceFinder file_detector = FileDetector() reference_finder = ReferenceFinder() loop_detector = LoopDetector() dir_maker = FilesDirectoryMaker() con_creator = ConnectionCreator() # --base path of the source files (user inputs) baseSourcePath = "/home/shan/Developments/Projects/research-devs/Blog" # --tables which contains the user details (user inputs) user_tables = ["user_login", "userimage"] # --calling to the function get_source_file_paths to get all the files in the base source path file_list = file_detector.get_source_file_paths(baseSourcePath) # --detect the access files like login, signup and database connections access_file_list = file_detector.get_user_management_file_paths( file_list, user_tables) database_connect_file_path = file_detector.get_database_connect_file_name( file_list) database_connect_file = " "
# --creating target directory for all php files targetPhpDirPath = "/home/shan/Developments/Projects/research-devs/python-devs/filehandler/phpSnippets" if not os.path.exists(targetPhpDirPath): os.mkdir(targetPhpDirPath, access_rights) # --creating the target directory for the altered source code targetAlteredSrcDirPath = "/home/shan/Developments/Projects/research-devs/python-devs/filehandler/alteredSrc" if not os.path.exists(targetAlteredSrcDirPath): os.mkdir(targetAlteredSrcDirPath, access_rights) # --path for each source files php snippets targetFileDirPath = "/home/shan/Developments/Projects/research-devs/python-devs/filehandler/phpSnippets/" + \ fileNameBase # occurrences = re.findall('<\?php(.*?)\?>', text) php_occurrences = ReferenceFinder.get_php_occurrences( ReferenceFinder, text) included_files = ReferenceFinder.get_included_php_file_paths( ReferenceFinder, php_occurrences, need_compl_path=False) required_files = ReferenceFinder.get_required_php_file_paths( ReferenceFinder, php_occurrences, need_compl_path=False) if php_occurrences.__len__() > 0: # --creating the directory for each php source file if not os.path.exists(targetFileDirPath): os.mkdir(targetFileDirPath, access_rights) if included_files.__len__() == 0 and required_files.__len__() == 0: alteredPhp = text for occurrence in php_occurrences: # --file name of the target php file fileName = targetFileDirPath + "/" + fileNameBase + "_php_part_" + str( i) + ".php"
def create_nested_hyperlinked_articles(file_path): global article_creation_tree global file_tree global created_article_details reference_finder = ReferenceFinder() avoided_file_list = DetailsKeeperDO.get_avoided_file_list(DetailsKeeperDO) source_code = SourceReader.get_source(file_path) hyperlink_details = reference_finder.get_links_details(source_code) if hyperlink_details.__len__() > 0: for link_details in hyperlink_details: # print(link_details) if not "No File" in link_details[1]: if any(link_details[1] in avoided_file for avoided_file in avoided_file_list): # print("avoided file") pass else: # print("not avoided file. File sent to create an article.") linked_source_code = SourceReader.get_source( link_details[1]) child_hyperlinks = reference_finder.get_links_details( linked_source_code) if child_hyperlinks.__len__() > 0: file_tree.update({link_details[1]: file_path}) create_nested_hyperlinked_articles(link_details[1]) else: file_tree.update({link_details[1]: file_path}) create_child_articles(link_details[1]) if file_path in article_creation_tree: count = article_creation_tree[file_path] count = count + 1 article_creation_tree.update({file_path: count}) else: count = 1 article_creation_tree.update({file_path: count}) parent_file = file_tree[link_details[1]] parent_article_count = article_creation_tree[parent_file] if parent_article_count == hyperlink_details.__len__(): create_parent_article_file(parent_file) if parent_file != check_root(): elder_file = file_tree[parent_file] if elder_file in article_creation_tree: elder_count = article_creation_tree[elder_file] elder_count = elder_count + 1 article_creation_tree.update( {elder_file: elder_count}) else: elder_count = 1 article_creation_tree.update( {elder_file: elder_count}) else: # print("External file is referenced.") file_tree.update({link_details[1]: file_path}) if file_path in article_creation_tree: count = article_creation_tree[file_path] count = count + 1 article_creation_tree.update({file_path: count}) else: count = 1 article_creation_tree.update({file_path: count})
# print(file) # print(file_list.__len__()) # from filehandler.file_migrator import FileMigrator # # file_migrator = FileMigrator() # file_migrator.move_media_files("/opt/lampp/htdocs/Blog/Frontend/frontend.php", "/opt/lampp/htdocs/Blog") from filehandler.file_detector import FileDetector from stringfinder.navigation_links_detector import NavigationLinksDetector from stringfinder.reference_finder import ReferenceFinder from stringfinder.include_tree_detector import incl_tree file_detector = FileDetector() nav_link_detector = NavigationLinksDetector() reference_finder = ReferenceFinder() file_list = file_detector.get_source_file_paths("/opt/lampp/htdocs/Blog") # navigation_file_list = ["/opt/lampp/htdocs/Blog/Template/Navigation/frontend_navigation.php", # "/opt/lampp/htdocs/Blog/Template/Navigation/profile_navigation.php", # "/opt/lampp/htdocs/Blog/Template/Navigation/profile_navigation_home.php"] navigation_file_list = ["/opt/lampp/htdocs/Blog/Template/Navigation/frontend_navigation.php"] nav_file_details = file_detector.get_navigation_files_details(navigation_file_list) nav_details = nav_link_detector.get_navigation_links(nav_file_details) nav_links = nav_link_detector.get_navigated_file_paths(nav_details, file_list) for nav in nav_links: print("===============") print(nav.get_nav_file_name()) print("===============") for det in nav.get_navigations(): has_incl = reference_finder.has_include(det) print(det)