def extract_timestring(header): """ Tries to extract a timestring from a header Returns None or a String that *could* be a valid timestring """ if type(header) != str: raise TypeError header = cleanup_text(header) timestring = None split_by_semicolon = header.split(';') split_by_newline = header.split('\n') split_by_id = re.split('\s+id\s+[^\s]*\s+', header) if len(split_by_semicolon) > 1: timestring = split_by_semicolon[-1] elif len(split_by_semicolon) == 1: if len(split_by_newline) > 1: # find it on the last line timestring = split_by_newline[-1] elif len(split_by_id) > 1: # find it after` id abc.xyz ` timestring = split_by_id[-1] if timestring is None: return None timestring = cleanup_text(timestring) timestring = cleanup_text(remove_details(timestring)) timestring = strip_timezone_name(timestring) timestring = re.sub('-0000', '+0000', timestring) return timestring
def extract_protocol(header): """ Get the protocol used. e.g. SMTP, HTTP etc. """ header = re.sub('\n', ' ', header) header = remove_details(header) header = cleanup_text(header) protocol = '' if header.startswith('from'): match = re.findall( """ from\s+(?:.*?)\s+by\s+(?:.*?)\s+ (?: (?:with|via) (.*?) (?:id|$|;) |id|$ ) """, header, re.DOTALL | re.X) protocol = match[0] if match else '' if header.startswith('by'): match = re.findall( """ by\s+(?:.*?)\s+ (?: (?:with|via) (.*?) (?:id|$|;) |id|$ ) """, header, re.DOTALL | re.X) protocol = match[0] if match else '' return cleanup_text(protocol)
def scrap_list_qna(content): item_page = content.find('div.item-page', first=True) for item in item_page.find('article.item.streamItem.streamItem-answer')[:5]: question = item.find('header > h2', first=True).text answer = item.find('div.streamItem_content', first=True).text answer = cleanup_text(answer) info = item.find('div.streamItem_details', first=True).text yield OrderedDict(question=question, answer=answer, time_info=info)
def extract_received_by_label(header): """ Get the hostname associated with `by` """ header = re.sub('\n', ' ', header) header = remove_details(header) header = cleanup_text(header) if header.startswith('from'): match = re.findall('from\s+(?:.*?)\s+by\s+(.*?)(?:\s+|$)', header) return match[0] if match else '' elif header.startswith('by'): match = re.findall('by\s+(.*?)(?:\s+|$)', header) return match[0] if match else '' return ''
def generate_trail(received): """ Takes a list of `received` headers and creates the email trail (structured information of hops in transit) """ if received is None: return None received = [cleanup_text(header) for header in received] trail = [analyse_hop(header) for header in received] # sort in chronological order trail.reverse() trail = set_delay_information(trail) return trail
def run_cmd(cmd, work_dir="/tmp", logger=None): """Run remote command.""" logger = logging.getLogger("fabric") r = None with settings(abort_exception=Exception), cd(work_dir), \ hide('output', 'running', 'warnings'), \ settings(warn_only=True): try: if logger: logger.info(cmd) r = run(cmd) if logger: for line in r.splitlines(): # Skip empty lines if re.match(r"^$", line): continue logger.info(cleanup_text(line)) except Exception as e: if logger: logger.error(e) return r
maxIoU = iou boxGT = boxT labelGT = label idxGT = idx if use_tesseract: text = pytesseract.image_to_string(img_cropped, lang="deu+frk", config=custom_oem_psm_config) else: result = reader.recognize(img_cropped) _, text, prob = result[0] resultsText += f'Detected Box ({min_x,min_y,max_x,max_y}) ' text = utils.cleanup_text(text) box_type = "FN" if maxIoU < 0.5: false_positives += 1 fps += 1 resultsText += "False positive\n" box_type = "FP" else: true_positives += 1 tps += 1 classifications_per_box[idxGT] += 1 textGT = utils.cleanup_text(labelGT) box_type = "TP" resultsText += "True positive True text: " + labelGT + " Detected text: " + text sum_similarity += similar(text, textGT)
def xen_backup(f=""): """Run xen backup on XenServers defined in config file.""" client_ip = str(env.host_string) log_path, _ = os.path.split(__LOG) ul.make_dir(log_path) logger = utils.init_logger_with_rotate(__LOG, client_ip.ljust(15)) logger.info("XenServer backup service started.") logger.info("Version: " + __VERSION) # Print XenServer hostname fl.hostname() # Make a list of VMs to backup vms = __make_list_of_vms(f, logger) if vms is None: logger.error("Aborting backup on XenServer.") return # Make mount point path for NFS if not __make_folder(__XEN_BKP_PATH, logger): logger.error("Aborting backup on XenServer.") return # Mount backup NFS __mount_folder(ul.get_ip() + ":" + __SERVER_BKP_PATH, __XEN_BKP_PATH, logger) # Check if NFS has been mounted if not ___check_mountpoint(__XEN_BKP_PATH, logger): logger.error("Aborting backup on XenServer.") return # Make a snapshot of the VMs for vm_uuid in vms: vm_uuid_bk = vm_uuid vm_name = str(vms[vm_uuid][fx.VALUE_NAMELABEL]) vm_power_state = str(vms[vm_uuid][fx.VALUE_POWERSTATE]) if not vm_name: logger.error("Could not get the name of a candidate" + "virtual machine to be backed-up.") logger.error("Skipping virtual machine") continue logger.info("Virtual machine is " + vm_power_state.lower()) if vm_power_state == fx.VALUE_RUNNING: logger.info("Taking snapshot from virtual machine on XenServer.") vm_uuid_bk = fx.vm_snapshot( vm_uuid, "\"" + vm_name + " " + __DATE_TIME + " backup\"") if vm_uuid_bk is None or vm_uuid_bk.return_code != 0: logger.error("Could not create a snapshot of the running " + "virtual machine on XenServer") logger.error("Skipping virtual machine") continue resp = fx.template_param_set(vm_uuid_bk) if resp is None or resp.return_code != 0: logger.error("Could not adjust a snapshot parameter of " + "the running virtual machine on XenServer") logger.error("Skipping virtual machine") continue # Creating path for saving backup on remote mounted backup server # if it does not exist resp = fl.mkdir(__XEN_BKP_PATH + "/" + client_ip) file_name = __XEN_BKP_PATH + "/" + client_ip + "/" + \ utils.cleanup_text(vm_name, True) + "_" + __DATE_TIME logger.info("Exporting virtual machine on XenServer to NFS") resp = fx.vm_export(vm_uuid_bk, file_name) if resp is None or resp.return_code != 0: logger.error("Could not export virtual machine on XenServer") logger.error("Virtual machine backup aborted") continue if vm_power_state == fx.VALUE_RUNNING: resp = fx.vm_uninstall(vm_uuid_bk, force="true") if resp is None or resp.return_code != 0: logger.warning("Could not uninstall virtual machine " + "snapshot on XenServer") logger.warning("Server needs to be cleaned up manually") # Unmount backup NFS logger.info("Unmounting NSF fron XenServer") if not __unmount_folder(__XEN_BKP_PATH, logger): return # Delete mount point logger.info("Deleting temporary mount point path fron XenServer") if not __delete_folder(__XEN_BKP_PATH, logger): return logger.info("Backup finished for XenServer at " + client_ip)