def download_site(self): # do not download twice if not force if self.skip_download: files = self.config.existing_files file_path = files[-1] logging.info("%s - ZIP already downloaded %sx. Last one is %s", self.site, len(files), file_path) Tracer.write_row(site=self.site, step="download", status="OK") return file_path # set timer to measure execution time start_time = timeit.default_timer() # make query. The call to session.post will wait until ZIP has been generated on Jahia site. logging.info("%s - Downloading %s...", self.site, self.config.file_name) response = self.session_handler.session.post( self.config.file_url, params=self.config.download_params, stream=True) logging.debug("%s - %s => %s", self.site, response.url, response.status_code) # When we arrive here, the Jahia ZIP file is ready to be downloaded. # raise exception in case of error if not response.status_code == requests.codes.ok: response.raise_for_status() # adapt streaming function to content-length in header logging.debug("%s - Headers %s", self.site, response.headers) # download file logging.info("%s - Saving response into %s...", self.site, self.config.file_path) with open(self.config.file_path, 'wb') as output: for chunk in response.iter_content(chunk_size=4096): if chunk: output.write(chunk) output.flush() zip_stats = os.stat(self.config.file_path) if zip_stats.st_size < 200: logging.error("The Jahia ZIP file for WordPress site is empty") raise Exception("Jahia ZIP is empty") # log execution time and return path to downloaded file elapsed = timedelta(seconds=timeit.default_timer() - start_time) logging.info("%s - File downloaded in %s", self.site, elapsed) Tracer.write_row(site=self.site, step="download", status="OK") # return PosixPath converted to string return str(self.config.file_path)
def parse(site, output_dir=None, use_cache=False, **kwargs): """ Parse the give site. """ try: # without changing this parameter the following sites crash # when they are dumped on disk with pickle: # biorob, disopt, euler, last, master-architecture # they are probably corrupted, so this is simply a hack # to make it work sys.setrecursionlimit(2000) # create subdir in output_dir site_dir = unzip(site, output_dir=output_dir) # where to cache our parsing pickle_file_path = os.path.join(site_dir, 'parsed_{}.pkl'.format(site)) # when using-cache: check if already parsed pickle_site = False if use_cache: if os.path.exists(pickle_file_path): with open(pickle_file_path, 'rb') as pickle_content: pickle_site = pickle.load(pickle_content) logging.info("Using the cached pickle file at %s" % pickle_file_path) logging.info("Parsing Jahia xml files from %s...", site_dir) if pickle_site: site = pickle_site else: logging.info("Cache not used, parsing the Site") site = Site(site_dir, site, fix_problematic_chars=True) print(site.report) # always save the parsed data on disk, so we can use the # cache later if we want with open(pickle_file_path, 'wb') as output: logging.info("Parsed site saved into %s" % pickle_file_path) pickle.dump(site, output, pickle.HIGHEST_PROTOCOL) # log success logging.info("Site %s successfully parsed" % site) Tracer.write_row(site=site.name, step="parse", status="OK") return site except Exception as err: logging.error("%s - parse - Exception: %s", site, err) raise err
def export_many(csv_file, output_dir=None, admin_password=None, use_cache=None, keep_extracted_files=False, **kwargs): rows = Utils.csv_filepath_to_dict(csv_file) # create a new WP site for each row print("\n{} websites will now be generated...".format(len(rows))) for index, row in enumerate(rows): print("\nIndex #{}:\n---".format(index)) # CSV file is utf-8 so we encode correctly the string to avoid errors during logging.debug display row_bytes = repr(row).encode('utf-8') logging.debug("%s - row %s: %s", row["wp_site_url"], index, row_bytes) features_flags = False if 'features_flags' not in row else row[ 'features_flags'] == 'yes' try: export( site=row['Jahia_zip'], wp_site_url=row['wp_site_url'], # We use unit_id as first option if exists even if we have unit_name because it doesn't change unit_name_or_id=row['unit_id'] if 'unit_id' in row else row['unit_name'], to_wordpress=True, clean_wordpress=False, output_dir=output_dir, theme=row['theme'], installs_locked=row['installs_locked'], updates_automatic=row['updates_automatic'], wp_env=row['openshift_env'], admin_password=admin_password, use_cache=use_cache, keep_extracted_files=keep_extracted_files, features_flags=features_flags, category=row['category']) except (Exception, subprocess.CalledProcessError) as e: logging.error(str(e)) Tracer.write_row(site=row['Jahia_zip'], step=e, status="KO")
def _execute(self, extension): # TODO: mock, do not execute tracing! # TODO: fix this import sys sys.argv = ['app', '-o', '/tmp/report_', 'ls'] app = Tracer() app.register_extension(extension) app.main()
def export(site, wp_site_url, unit_name_or_id, to_wordpress=False, clean_wordpress=False, to_dictionary=False, admin_password=None, output_dir=None, theme=None, installs_locked=False, updates_automatic=False, openshift_env=None, use_cache=None, keep_extracted_files=False, features_flags=False, category=None, **kwargs): """ Export the jahia content into a WordPress site. :param site: the name of the WordPress site :param wp_site_url: URL of WordPress site :param unit_name_or_id: unit name or unit ID of the WordPress site :param to_wordpress: to migrate data :param clean_wordpress: to clean data :param admin_password: an admin password :param output_dir: directory where the jahia zip file will be unzipped :param theme: WordPress theme used for the WordPress site :param installs_locked: boolean :param updates_automatic: boolean :param openshift_env: openshift_env environment (prod, int, gcharmier ...) :param keep_extracted_files: command to keep files extracted from jahia zip :param features_flags: Tell to clean page content or not :param category: Site category which defines plugin list to install and configure """ # Download, Unzip the jahia zip and parse the xml data site = parse(site=site, use_cache=use_cache, output_dir=output_dir) # Define the default language default_language = _get_default_language(site.languages) # For polylang plugin, we need position default lang in first position languages = _set_default_language_in_first_position( default_language, site.languages) if not site.acronym[default_language]: logging.warning("No wp site title in %s", default_language) wp_site_title = None else: wp_site_title = site.acronym[default_language] # theme if not site.theme[default_language] or site.theme[ default_language] == "epfl": theme_faculty = "" else: theme_faculty = site.theme[default_language] if not theme: # Setting correct theme depending on parsing result theme = BANNER_THEME_NAME if default_language in site.banner else DEFAULT_THEME_NAME # If nothing specified, we use default if category is None: category = DEFAULT_WP_SITE_CATEGORY # tagline if not site.title[default_language]: logging.warning("No wp tagline in %s", default_language) wp_tagline = None else: wp_tagline = site.title # If we get unit ID if unit_name_or_id.isdigit(): unit_id = unit_name_or_id # fetch unit name from ldap try: logging.info("Fetching LDAP for unit '%s' name...", unit_id) unit_name = get_unit_name(unit_id) logging.info("LDAP name found = %s...", unit_name) except LDAPSocketOpenError: logging.error("LDAP is not responding, aborting here...") raise else: # We get unit name unit_name = unit_name_or_id # fetch unit id from ldap try: logging.info("Fetching LDAP for unit '%s' ID...", unit_name) unit_id = get_unit_id(unit_name) logging.info("LDAP ID found = %s...", unit_id) except LDAPSocketOpenError: logging.error("LDAP is not responding, aborting here...") raise info = { # information from parser 'langs': ",".join(languages), 'wp_site_title': wp_site_title, 'wp_tagline': wp_tagline, 'theme_faculty': theme_faculty, 'unit_name': unit_name, # information from source of truth 'openshift_env': openshift_env, 'wp_site_url': wp_site_url, 'theme': theme, 'updates_automatic': updates_automatic, 'installs_locked': installs_locked, 'category': category, # determined information 'unit_id': unit_id, 'from_export': True } # skip options, used only during development # # skip_base: if True don't install WordPress, use the existing site # skip_media: if True don't import the media # skip_pages: if True don't import the pages skip_base = False skip_media = False skip_pages = False # List of plugins to let in 'deactivated' state during import. To earn more time, they are not activated during # WordPress empty site generation. Because activating them takes time and we have to take the same amount of time # to deactivate them before running Jahia site import. # Deactivating plugins can improve import time by ~80% # WARNING: be careful with the list order. Plugins will be reactivated after import by using list order. So if # there are dependencies between plugins, arrange them in the right way. deactivated_plugins = [ 'mainwp-child', 'epfl-faq', 'epfl-grid', 'epfl-infoscience', 'epfl-infoscience-search', 'epfl-map', 'epfl-memento', 'epfl-news', 'epfl-people', 'epfl-scheduler', 'EPFL-Content-Filter', 'EPFL-Share', 'epfl-snippet', 'epfl-toggle', 'epfl-tableau', 'epfl-twitter', 'epfl-xml', 'epfl-video', 'epfl-404', 'epfl-stats', 'epfl-google-forms', 'feedzy-rss-feeds', 'cache-control', 'remote-content-shortcode', 'shortcode-ui', 'shortcode-ui-richtext', # This one needs to come after the previous one 'shortcodes-ultimate', 'simple-sitemap', 'svg-support', 'enlighter', 'pdfjs-viewer-shortcode', 'tinymce-advanced', 'varnish-http-purge' ] # Generate a WordPress site wp_generator = WPGenerator(info, admin_password) # base installation if skip_base: logging.info("Deactivating %s plugins...", len(deactivated_plugins)) for plugin_name in deactivated_plugins: # We do a 'try' to handle missing plugins (if exists) try: wp_generator.run_wp_cli( "plugin deactivate {}".format(plugin_name)) except: logging.info("Plugin %s doesn't seem's to be installed", plugin_name) try: # even if we skip the base installation we need to reactivate # the basic auth plugin for the rest API wp_generator.run_wp_cli("plugin activate Basic-Auth") except: # if activation fails it means the plugin is not installed wp_generator.install_basic_auth_plugin() else: # If returns false, it means there was an error if not wp_generator.generate(deactivated_plugins): # We just display line to add to CSV _generate_csv_line(wp_generator) return wp_generator.install_basic_auth_plugin() # dual auth if settings.ACTIVE_DUAL_AUTH: wp_generator.active_dual_auth() # exporter wp_exporter = WPExporter(site, wp_generator, default_language, output_dir=output_dir) # clean if clean_wordpress: logging.info("Cleaning WordPress for %s...", site.name) wp_exporter.delete_all_content() logging.info("Data of WordPress site %s successfully deleted", site.name) # to WordPress if to_wordpress: logging.info("Exporting %s to WordPress...", site.name) try: if wp_generator.get_number_of_pages() == 0: wp_exporter.import_data_to_wordpress( skip_pages=skip_pages, skip_media=skip_media, features_flags=features_flags) wp_exporter.write_redirections() _fix_menu_location(wp_generator, languages, default_language) logging.info("Reactivating %s plugins...", len(deactivated_plugins)) # Reactivating plugins for plugin_name in deactivated_plugins: # We do a 'try' to handle missing plugins (if exists) try: wp_generator.run_wp_cli( "plugin activate {}".format(plugin_name)) except: logging.info( "Plugin %s doesn't seem's to be installed", plugin_name) logging.info("Site %s successfully exported to WordPress", site.name) else: logging.info("Site %s already exported to WordPress", site.name) except (Exception, subprocess.CalledProcessError) as e: logging.error(str(e)) Tracer.write_row(site=site.name, step=e, status="KO") if not settings.DEBUG: wp_generator.clean() raise e Tracer.write_row(site=site.name, step="export", status="OK") wp_generator.uninstall_basic_auth_plugin() wp_generator.enable_updates_automatic_if_allowed() # to dictionary if to_dictionary: data = DictExporter.generate_data(site) pprint(data, width=settings.LINE_LENGTH_ON_PPRINT) _generate_csv_line(wp_generator) if not keep_extracted_files: # Delete extracted zip files # We take dirname because site.base_path is the path to the subfolder in the zip. # Example : path_to_extract/dcsl/dcsl # And we want to delete path_to_extract/dcsl base_zip_path = os.path.dirname(os.path.abspath(site.base_path)) logging.debug("Removing zip extracted folder '%s'", base_zip_path) if os.path.exists(base_zip_path): shutil.rmtree(base_zip_path) return wp_exporter
def main(): Tracer().main()
def trace_model(self, dirnameOverride=None, numMemoryRegions: int = 3, modelName: str = 'model', foldBN: bool = True, outputLayerID: int = -1, custom_image_path=None) -> None: """ Trace the model after pruning and quantization, and save the trace and parameters :return: None """ dirname = self.config.checkpointSaveDir if dirnameOverride is None else dirnameOverride # Prune and quantize the model self.eval_prep() # Deepcopy doesn't work, do the following instead: # See https://discuss.pytorch.org/t/deep-copying-pytorch-modules/13514/2 module = cifar_resnet56() module = self.quantize_model_method(module, self.qatConfig) module = self.prune_network_method(module, self.experimentStatus.targetSparsity, self.config) module.load_state_dict(self.model.state_dict()) with torch.no_grad(): # Hack # module.inputConvBNReLU._modules['0'].running_mean.zero_() # module.inputConvBNReLU._modules['0'].beta.zero_() # end of hack module.eval() trace = Tracer(module, _foldBN=foldBN, _defaultPruneCluster=self.config.pruneCluster, _defaultPruneRangeInCluster=self.config.pruneRangeInCluster) """ Run inference and save a reference input-output pair """ blobPath = os.path.join(dirname, modelName + '_inout.yaml') blobFile = open(blobPath, 'w') blobDict: dict = {} output = None sampleIn = None if custom_image_path is None: for (data, target) in self.valDataLoader: sampleIn = data[0].unsqueeze(0) print(sampleIn.shape) output = trace.getOutput(sampleIn, outputLayerID) break else: print('Using custom image for inference tracing: {}'.format(custom_image_path)) img = Image.open(custom_image_path) img = img.convert('RGB') # val_transform = transforms.Compose([ # transforms.Resize(256), # transforms.CenterCrop(224), # transforms.ToTensor(), # transforms.Normalize(mean=[0.000, 0.000, 0.000], # std=[0.229, 0.224, 0.225]) # ]) sampleIn = self.val_transform(img) sampleIn = sampleIn.unsqueeze(0) print(sampleIn.shape) output = trace.getOutput(sampleIn, outputLayerID) inputArray = sampleIn.view(sampleIn.numel()).tolist() blobDict['input'] = inputArray outputArray = output.view(output.numel()).tolist() blobDict['output'] = outputArray # We want list to be dumped as in-line format, hence the choice of the default_flow_style # See https://stackoverflow.com/questions/56937691/making-yaml-ruamel-yaml-always-dump-lists-inline yaml.dump(blobDict, blobFile, default_flow_style=None) trace.traceModel(sampleIn) trace.annotate(numMemRegions=numMemoryRegions) trace.dump(dirname, fileNameBase=modelName)
def trace(self, dirname: str, fileBaseName: str): if self.mode == 'test': dummyInput = torch.rand(size=[1, 3, 32, 32]) * (-1.0) + 4.0 elif self.mode == 'add': # dummyInput = (torch.rand(size=[1, 4, 8, 8]) * (-1.0) + 4.0, # torch.rand(size=[1, 4, 8, 8]) * (-1.0) + 4.0) dummyInput = (torch.rand(size=[1, 16, 8, 8]) * (4.0) - 2.0, torch.rand(size=[1, 16, 8, 8]) * (2.0) - 1.0) elif self.mode == 'restest_cifar10': dummyInput = torch.rand(size=[1, 3, 32, 32]) * 4.0 - 2.0 elif self.mode == 'restest_imagenet': dummyInput = torch.rand(size=[1, 3, 56, 56]) * 4.0 - 2.0 elif self.mode == 'resnet50_conv12': dummyInput = torch.rand(size=[1, 256, 56, 56]) * 4.0 - 2.0 elif self.mode == 'pointconv': dummyInput = torch.rand(size=[1, 4, 4, 4]) * (-1.0) + 4.0 elif self.mode == 'resnet50_input_conv': dummyInput = torch.rand(size=[1, 3, 224, 224]) elif self.mode == 'resnet50_input_conv_nobn': dummyInput = torch.rand(size=[1, 3, 224, 224]) elif self.mode == 'resnet50_conv': input_range = math.pow(2.0, 2.0) dummyInput = torch.rand( size=[1, 64, 56, 56]) * 1.9 * input_range - input_range else: dummyInput = torch.rand(size=[1, 4, 8, 8]) * (-1.0) + 4.0 """ Run inference thrice. The first run is to calibrate the batch normalization parameters """ self.model.train(mode=True) self.model.apply(torch.quantization.disable_observer) if isinstance(dummyInput, tuple): self.model(*dummyInput) else: self.model(dummyInput) """ The second run is to calibrate the quantization observers """ self.model.apply(torch.quantization.enable_observer) if isinstance(dummyInput, tuple): self.model(*dummyInput) else: self.model(dummyInput) """ Finally, trace the model """ self.model.apply(torch.quantization.disable_observer) self.model.eval() tracer = Tracer(self.model) # quantized_model = torch.quantization.convert(self.model.eval(), inplace=False) if isinstance(dummyInput, tuple): dummyOutput = tracer.getOutput(*dummyInput, layerID=-1) else: dummyOutput = tracer.getOutput(dummyInput, layerID=-1) print("Saves the dummy input and output") blobPath = os.path.join(dirname, fileBaseName + '_inout.yaml') blobFile = open(blobPath, 'w') blobDict: dict = {} # Save inputs if isinstance(dummyInput, tuple): id = 0 for input in dummyInput: idText = 'input_' + str(id) blobDict[idText] = input.view(input.numel()).tolist() id += 1 else: inputArray = dummyInput.view(dummyInput.numel()).tolist() blobDict['input'] = inputArray # save outputs outputArray = dummyOutput.view(dummyOutput.numel()).tolist() blobDict['output'] = outputArray # We want list to be dumped as in-line format, hence the choice of the default_flow_style # See https://stackoverflow.com/questions/56937691/making-yaml-ruamel-yaml-always-dump-lists-inline yaml.dump(blobDict, blobFile, default_flow_style=None) tracer.traceModel(dummyInput) tracer.annotate(numMemRegions=3) tracer.dump(dirname, fileBaseName)
#!/usr/bin/env python3 from tracer.tracer import Tracer Tracer().main()
def unzip_one(output_dir, site_name, zip_file): """ Unzip a jahia zip file A jahia zip file contains many files including a zip file <site-name>.zip. This file will be unzipped too and the path of the unzipped root directory will be return. :param output_dir: directory where the jahia zip file will be unzipped :param site_name: WordPress name site :param zip_file: name of jahia zip file """ # create subdir in output_dir output_subdir = os.path.join(output_dir, site_name) if output_subdir: if not os.path.isdir(output_subdir): os.mkdir(output_subdir) # check if unzipped files already exists unzip_path = os.path.join(output_subdir, site_name) if os.path.isdir(unzip_path): logging.info("Already unzipped %s", unzip_path) Tracer.write_row(site=site_name, step="unzip", status="OK") return unzip_path else: os.makedirs(unzip_path) logging.info("Unzipping %s...", zip_file) # make sure we have an input file if not zip_file or not os.path.isfile(zip_file): logging.error("%s - unzip - Jahia zip file %s not found", site_name, zip_file) raise ValueError("Jahia zip file not found") # create zipFile to manipulate / extract zip content export_zip = zipfile.ZipFile(zip_file, 'r') # make sure we have the zip containing the site zip_name = "{}.zip".format(site_name) if zip_name not in export_zip.namelist(): logging.error("%s - unzip - zip file %s not found in main zip", site_name, zip_name) raise ValueError("Jahia zip file does not contain site file") # extract the export zip file export_zip.extractall(output_subdir) export_zip.close() # unzip the zip with the files zip_path = os.path.join(output_subdir, zip_name) zip_ref_with_files = zipfile.ZipFile(zip_path, 'r') for ziped_file in zip_ref_with_files.infolist(): data = zip_ref_with_files.read( ziped_file) # extract zipped data into memory # convert unicode file path to cp437 (encoding used by Jahia for the filenames in the zip files) disk_file_name = ziped_file.filename.encode('cp437') dir_name = os.path.dirname(disk_file_name) # If the file is in a subfolder, create the subolder if it does not exist if dir_name: try: os.makedirs(os.path.join(unzip_path.encode('cp437'), dir_name)) except OSError as e: if e.errno == os.errno.EEXIST: pass else: raise e except Exception as e: raise e if os.path.basename(disk_file_name): with open(os.path.join(unzip_path.encode('cp437'), disk_file_name), 'wb') as fd: fd.write(data) zip_ref_with_files.close() logging.info("Site successfully extracted in %s", unzip_path) Tracer.write_row(site=site_name, step="unzip", status="OK") return unzip_path
def _drill_input(self): ''' symbolically step down a path with a tracer, trying to concretize inputs for unencountered state transitions. ''' # initialize the tracer t = Tracer(self.binary, self.input, max_size=50, hooks=self._hooks) self._set_concretizations(t) self._set_simproc_limits(t) # update encounters with known state transitions self._encounters.update(izip(t.trace, islice(t.trace, 1, None))) l.debug("drilling into %r", self.input) l.debug("input is %r", self.input) # used for finding the right index in the fuzz_bitmap prev_loc = 0 branches = t.next_branch() while len(branches.active) > 0 and t.bb_cnt < len(t.trace): # check here to see if a crash has been found if self.redis and self.redis.sismember( self.identifier + "-finished", True): return # mimic AFL's indexing scheme if len(branches.missed) > 0: prev_addr = branches.missed[0].addr_trace[-1] # a bit ugly prev_loc = prev_addr prev_loc = (prev_loc >> 4) ^ (prev_loc << 8) prev_loc &= self.fuzz_bitmap_size - 1 prev_loc = prev_loc >> 1 for path in branches.missed: cur_loc = path.addr cur_loc = (cur_loc >> 4) ^ (cur_loc << 8) cur_loc &= self.fuzz_bitmap_size - 1 hit = bool( ord(self.fuzz_bitmap[cur_loc ^ prev_loc]) ^ 0xff) transition = (prev_addr, path.addr) l.debug("found %x -> %x transition", transition[0], transition[1]) if not hit and not self._has_encountered( transition) and not self._has_false(path): t.remove_preconstraints(path) if path.state.satisfiable(): # a completely new state transitions, let's try to accelerate AFL # by finding a number of deeper inputs l.info( "found a completely new transition, exploring to some extent" ) w = self._writeout(prev_addr, path) if w is not None: yield w for i in self._symbolic_explorer_stub(path): yield i else: l.debug("path to %#x was not satisfiable", transition[1]) else: l.debug("%x -> %x has already been encountered", transition[0], transition[1]) try: branches = t.next_branch() except IndexError: branches.active = []