Python cleanup_text Exemples, utils.cleanup_text Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : module.py Projet : z00nx/emailtrail

def extract_timestring(header):
    """
    Tries to extract a timestring from a header
    Returns None or a String that *could* be a valid timestring
    """
    if type(header) != str:
        raise TypeError

    header = cleanup_text(header)
    timestring = None

    split_by_semicolon = header.split(';')
    split_by_newline = header.split('\n')
    split_by_id = re.split('\s+id\s+[^\s]*\s+', header)

    if len(split_by_semicolon) > 1:
        timestring = split_by_semicolon[-1]
    elif len(split_by_semicolon) == 1:
        if len(split_by_newline) > 1:
            # find it on the last line
            timestring = split_by_newline[-1]
        elif len(split_by_id) > 1:
            # find it after` id abc.xyz `
            timestring = split_by_id[-1]

    if timestring is None:
        return None

    timestring = cleanup_text(timestring)
    timestring = cleanup_text(remove_details(timestring))
    timestring = strip_timezone_name(timestring)
    timestring = re.sub('-0000', '+0000', timestring)

    return timestring

Exemple #2

0

Afficher le fichier

Fichier : module.py Projet : z00nx/emailtrail

def extract_protocol(header):
    """ Get the protocol used. e.g. SMTP, HTTP etc. """
    header = re.sub('\n', ' ', header)
    header = remove_details(header)
    header = cleanup_text(header)

    protocol = ''

    if header.startswith('from'):
        match = re.findall(
            """
            from\s+(?:.*?)\s+by\s+(?:.*?)\s+
            (?:
                (?:with|via)
                (.*?)
                (?:id|$|;)
                |id|$
            )
            """, header, re.DOTALL | re.X)
        protocol = match[0] if match else ''
    if header.startswith('by'):
        match = re.findall(
            """
            by\s+(?:.*?)\s+
            (?:
                (?:with|via)
                (.*?)
                (?:id|$|;)
                |id|$
            )
            """, header, re.DOTALL | re.X)
        protocol = match[0] if match else ''

    return cleanup_text(protocol)

Exemple #3

0

Afficher le fichier

Fichier : services.py Projet : Keda87/scrap-you-babe

 def scrap_list_qna(content):
     item_page = content.find('div.item-page', first=True)
     for item in item_page.find('article.item.streamItem.streamItem-answer')[:5]:
         question = item.find('header > h2', first=True).text
         answer = item.find('div.streamItem_content', first=True).text
         answer = cleanup_text(answer)
         info = item.find('div.streamItem_details', first=True).text
         yield OrderedDict(question=question, answer=answer, time_info=info)

Exemple #4

0

Afficher le fichier

Fichier : module.py Projet : z00nx/emailtrail

def extract_received_by_label(header):
    """ Get the hostname associated with `by` """
    header = re.sub('\n', ' ', header)
    header = remove_details(header)
    header = cleanup_text(header)

    if header.startswith('from'):
        match = re.findall('from\s+(?:.*?)\s+by\s+(.*?)(?:\s+|$)', header)
        return match[0] if match else ''
    elif header.startswith('by'):
        match = re.findall('by\s+(.*?)(?:\s+|$)', header)
        return match[0] if match else ''
    return ''

Exemple #5

0

Afficher le fichier

Fichier : module.py Projet : z00nx/emailtrail

def generate_trail(received):
    """
    Takes a list of `received` headers and
    creates the email trail (structured information of hops in transit)
    """
    if received is None:
        return None

    received = [cleanup_text(header) for header in received]
    trail = [analyse_hop(header) for header in received]

    # sort in chronological order
    trail.reverse()
    trail = set_delay_information(trail)
    return trail

Exemple #6

0

Afficher le fichier

Fichier : __init__.py Projet : hirleydayan/BackupFabric

def run_cmd(cmd, work_dir="/tmp", logger=None):
    """Run remote command."""
    logger = logging.getLogger("fabric")
    r = None
    with settings(abort_exception=Exception), cd(work_dir), \
        hide('output', 'running', 'warnings'), \
            settings(warn_only=True):
        try:
            if logger:
                logger.info(cmd)
            r = run(cmd)
            if logger:
                for line in r.splitlines():
                    # Skip empty lines
                    if re.match(r"^$", line):
                        continue
                    logger.info(cleanup_text(line))
        except Exception as e:
            if logger:
                logger.error(e)
    return r

Exemple #7

0

Afficher le fichier

                maxIoU = iou
                boxGT = boxT
                labelGT = label
                idxGT = idx

        if use_tesseract:
            text = pytesseract.image_to_string(img_cropped,
                                               lang="deu+frk",
                                               config=custom_oem_psm_config)
        else:
            result = reader.recognize(img_cropped)
            _, text, prob = result[0]

        resultsText += f'Detected Box ({min_x,min_y,max_x,max_y}) '

        text = utils.cleanup_text(text)
        box_type = "FN"
        if maxIoU < 0.5:
            false_positives += 1
            fps += 1
            resultsText += "False positive\n"
            box_type = "FP"
        else:
            true_positives += 1
            tps += 1
            classifications_per_box[idxGT] += 1
            textGT = utils.cleanup_text(labelGT)
            box_type = "TP"
            resultsText += "True positive True text: " + labelGT + " Detected text: " + text
            sum_similarity += similar(text, textGT)

Exemple #8

0

Afficher le fichier

Fichier : bf.py Projet : hirleydayan/BackupFabric

def xen_backup(f=""):
    """Run xen backup on XenServers defined in config file."""
    client_ip = str(env.host_string)
    log_path, _ = os.path.split(__LOG)
    ul.make_dir(log_path)

    logger = utils.init_logger_with_rotate(__LOG, client_ip.ljust(15))

    logger.info("XenServer backup service started.")
    logger.info("Version: " + __VERSION)

    # Print XenServer hostname
    fl.hostname()

    # Make a list of VMs to backup
    vms = __make_list_of_vms(f, logger)

    if vms is None:
        logger.error("Aborting backup on XenServer.")
        return

    # Make mount point path for NFS
    if not __make_folder(__XEN_BKP_PATH, logger):
        logger.error("Aborting backup on XenServer.")
        return

    # Mount backup NFS
    __mount_folder(ul.get_ip() + ":" + __SERVER_BKP_PATH, __XEN_BKP_PATH,
                   logger)

    # Check if NFS has been mounted
    if not ___check_mountpoint(__XEN_BKP_PATH, logger):
        logger.error("Aborting backup on XenServer.")
        return

    # Make a snapshot of the VMs
    for vm_uuid in vms:
        vm_uuid_bk = vm_uuid
        vm_name = str(vms[vm_uuid][fx.VALUE_NAMELABEL])
        vm_power_state = str(vms[vm_uuid][fx.VALUE_POWERSTATE])
        if not vm_name:
            logger.error("Could not get the name of a candidate" +
                         "virtual machine to be backed-up.")
            logger.error("Skipping virtual machine")
            continue

        logger.info("Virtual machine is " + vm_power_state.lower())

        if vm_power_state == fx.VALUE_RUNNING:
            logger.info("Taking snapshot from virtual machine on XenServer.")
            vm_uuid_bk = fx.vm_snapshot(
                vm_uuid, "\"" + vm_name + " " + __DATE_TIME + " backup\"")

            if vm_uuid_bk is None or vm_uuid_bk.return_code != 0:
                logger.error("Could not create a snapshot of the running " +
                             "virtual machine on XenServer")
                logger.error("Skipping virtual machine")
                continue

            resp = fx.template_param_set(vm_uuid_bk)
            if resp is None or resp.return_code != 0:
                logger.error("Could not adjust a snapshot parameter of " +
                             "the running virtual machine on XenServer")
                logger.error("Skipping virtual machine")
                continue

        # Creating path for saving backup on remote mounted backup server
        # if it does not exist
        resp = fl.mkdir(__XEN_BKP_PATH + "/" + client_ip)

        file_name = __XEN_BKP_PATH + "/" + client_ip + "/" + \
            utils.cleanup_text(vm_name, True) + "_" + __DATE_TIME

        logger.info("Exporting virtual machine on XenServer to NFS")
        resp = fx.vm_export(vm_uuid_bk, file_name)

        if resp is None or resp.return_code != 0:
            logger.error("Could not export virtual machine on XenServer")
            logger.error("Virtual machine backup aborted")
            continue

        if vm_power_state == fx.VALUE_RUNNING:
            resp = fx.vm_uninstall(vm_uuid_bk, force="true")
            if resp is None or resp.return_code != 0:
                logger.warning("Could not uninstall virtual machine " +
                               "snapshot on XenServer")
                logger.warning("Server needs to be cleaned up manually")

    # Unmount backup NFS
    logger.info("Unmounting NSF fron XenServer")
    if not __unmount_folder(__XEN_BKP_PATH, logger):
        return

    # Delete mount point
    logger.info("Deleting temporary mount point path fron XenServer")
    if not __delete_folder(__XEN_BKP_PATH, logger):
        return

    logger.info("Backup finished for XenServer at " + client_ip)