Esempio n. 1
0
def html_similarity(html_arg: html_arg_dict) -> BadResult:
    """Determine similarity of HTML arguments."""
    sim = lev_similarity(html_arg['ng_html'].prettify(),
                         html_arg['legacy_html'].prettify())
    if sim < 0.69:
        return BadResult(html_arg['id'], "html_similarity",
                         f"html_pretty_sim for {html_arg['id']} = {sim}")
    return None
Esempio n. 2
0
def text_similarity(text_arg: text_arg_dict) -> BadResult:
    """Compute Levenshtein similarity of text."""
    sim = lev_similarity(text_arg['ng_text'], text_arg['legacy_text'])
    if sim < 0.74:
        res = f"text_sim for {text_arg['paper_id']}  = {sim}"
        return BadResult(text_arg['paper_id'], 'text_similarity', res)
    else:
        return None
Esempio n. 3
0
def compare_status(res_arg: res_arg_dict) -> BadResult:
    """Compare HTTP status codes from responses."""
    if res_arg['ng_res'].status_code == 200 and res_arg[
            'legacy_res'].status_code == 200:
        return None
    else:
        res = f'HTTP status for {res_arg["ng_url"]} was {res_arg["ng_res"].status_code} ' \
              f'and for {res_arg["legacy_url"]} was {res_arg["legacy_res"].status_code}'
        return BadResult(res_arg['paper_id'], 'compare_status', res)
Esempio n. 4
0
def metadata_fields_similarity(html_arg: html_arg_dict) -> BadResult:
    """
    Determine similarity of metadata fields.

    There should be a div.metatable. It may have others and if it does they
    must be on both NG an legacy pages.
    """
    ng_trs = html_arg['ng_html'].find('div', 'metatable').find_all('tr')
    legacy_trs = html_arg['legacy_html'].find('div',
                                              'metatable').find_all('tr')

    def to_label(tr):
        return tr.find('td', 'label').contents

    ng_labels = set(ng_trs.map(to_label))
    legacy_labels = set(legacy_trs.map(to_label))

    if ng_labels == legacy_labels:
        return None
    else:
        return BadResult(
            html_arg['id'],
            "Metadata field included on NG do not match those from legacy" +
            f"NG: {ng_labels} Legacy: {legacy_labels}")
Esempio n. 5
0
 def protected(res_args: Dict) -> BadResult:
     # noinspection PyBroadException
     try:
         return fn(res_args)
     except Exception as ex:
         return BadResult(res_args['id'], "name unknown", traceback.format_exc())
Esempio n. 6
0
def _element_similarity(name: str, get_element: Callable[[BeautifulSoup],
                                                         BeautifulSoup],
                        min_sim: float, required: bool, check_counts: bool,
                        text_trans: Callable[[str], str],
                        html_arg: html_arg_dict) -> BadResult:
    """
    Perform element similarity.

    Uses get_element to select an element of the BS doc on both NG and Legacy
    do a similarity.

    required: element must be in both NG and Legacy.
    check_counts: counts of elements must be the same in both NG and Legacy,
    could be 0.
    """
    legacy = get_element(html_arg['legacy_html'])
    ng = get_element(html_arg['ng_html'])

    if required:
        if len(ng) == 0 and len(legacy) == 0:
            return BadResult(
                html_arg['id'], name,
                f"Missing field {name} for {html_arg['id']} from NG and Legacy"
            )
        if len(ng) == 0:
            return BadResult(
                html_arg['id'], name,
                f"Missing field {name} for {html_arg['id']} from NG")
        if len(legacy) == 0:
            return BadResult(
                html_arg['id'], name,
                f"Missing field {name} for {html_arg['id']} from legacy")

    if check_counts and (len(legacy) != len(ng)):
        if ng:
            ng_ele_txt = ng[0].prettify()
        else:
            ng_ele_txt = 'MISSING'
        if legacy:
            legacy_ele_txt = legacy[0].prettify()
        else:
            legacy_ele_txt = 'MISSING'

        return BadResult(
            html_arg['id'], name,
            f"bad counts for {name} for {html_arg['id']} ng: {len(ng)} legacy: {len(legacy)}",
            legacy_ele_txt, ng_ele_txt)

    ng_ele_txt = ''
    legacy_ele_txt = ''

    if len(ng) > 0 and len(legacy) > 0:
        ng_ele_txt = text_trans(ng[0].prettify())
        legacy_ele_txt = text_trans(legacy[0].prettify())
        sim = lev_similarity(ng_ele_txt, legacy_ele_txt)

        if sim < min_sim:
            msg = f"Elements did not meet min similarity of {min_sim}"
            return BadResult(html_arg['id'], name, msg, legacy_ele_txt,
                             ng_ele_txt, sim)

        msg = f"GOOD: Elements did meet min similarity of {min_sim}"
        return BadResult(html_arg['id'], name, msg, '', '', sim)
    else:
        if not required:
            return None

        if len(ng) > 0:
            ng_ele_txt = ng[0].prettify()
        if len(legacy) > 0:
            legacy_ele_txt = legacy[0].prettify()

        msg = 'zero elements detected: ' \
              + f'legacy length was {len(legacy)}; ng length was {len(ng)} '
        return BadResult(html_arg['id'], name, msg, legacy_ele_txt, ng_ele_txt,
                         0.0)
Esempio n. 7
0
 def call_it(fn: Callable[[html_arg_dict], BadResult]) -> BadResult:
     # noinspection PyBroadException
     try:
         return fn(html_args)
     except Exception as ex:
         return BadResult(html_args['paper_id'], 'run_compare_html', traceback.format_exc())
Esempio n. 8
0
 def call_it(fn: Callable[[text_arg_dict], BadResult]) -> BadResult:
     # noinspection PyBroadException
     try:
         return fn(text_dict)
     except Exception as ex:
          return BadResult(res_args['paper_id'], 'run_compare_response', traceback.format_exc())