Example #1
0
def quantify(analysis):
    edge_marker = [('', '')]
    pos_tags = edge_marker + analysis.pos_tags() + edge_marker
    num_tags = len(pos_tags) - 2  # Remove 2 edge markers

    count = 0
    for i in range(len(pos_tags)):
        if     is_proper_noun(pos_tags[i])   and \
           not is_proper_noun(pos_tags[i-1]) and \
           not is_proper_noun(pos_tags[i+1]):
            count += 1

    return {"single_naming": float(count) / num_tags}
Example #2
0
def quantify(analysis):
    edge_marker = [('', '')]
    pos_tags = edge_marker + analysis.pos_tags() + edge_marker
    num_tags = len(pos_tags) - 2 # Remove 2 edge markers

    count = 0
    for i in range(len(pos_tags)):
        if     is_proper_noun(pos_tags[i])   and \
           not is_proper_noun(pos_tags[i-1]) and \
           not is_proper_noun(pos_tags[i+1]):
            count += 1

    return { "single_naming": float(count) / num_tags }
def quantify(analysis):
    """Quantify explicit naming."""
    proper_nouns = sum(1 for x in analysis.pos_tags() if is_proper_noun(x))

    pronouns = sum(1 for x in analysis.tokens() if x in PRONOUNS_LIST)

    return { "explicit_naming": 3.0 * (float(pronouns) / proper_nouns) }
def quantify(analysis):
    """Quantify mean multiple naming."""

    if analysis.lang == 'en':
        from translationese.utils import is_proper_noun
    elif analysis.lang == 'zh':
        from translationese.utils import is_proper_noun_zh as is_proper_noun
    else:
        print('language "{}" not implemented yet for single naming'.format(analysis.lang))
        exit()

    num_proper_noun_runs = 0
    num_proper_noun_tokens = 0
    currently_in_run = False

    # pos_tags = nltk.pos_tag(analysis.case_tokens())
    # pos_tags = [word_tag_pair[1] for word_tag_pair in analysis.pos_tags()]
    pos_tags = analysis.pos_tags()

    for token in pos_tags:
        if is_proper_noun(token):
            currently_in_run = True
            num_proper_noun_tokens += 1
        elif currently_in_run:
            # Run just ended
            currently_in_run = False
            num_proper_noun_runs += 1

    if num_proper_noun_runs == 0:
        result = 0
    else:
        result = float(num_proper_noun_tokens) / num_proper_noun_runs
    
    return { "mean_multiple_naming": result }
def quantify(analysis):
    """Quantify explicit naming."""
    proper_nouns = sum(1 for x in analysis.pos_tags() if is_proper_noun(x))

    pronouns = sum(1 for x in analysis.tokens() if x in PRONOUNS_LIST)

    return {"explicit_naming": 3.0 * (float(pronouns) / proper_nouns)}
def quantify(analysis):
    """Quantify single naming."""

    if analysis.lang == 'en':
        from translationese.utils import is_proper_noun
    elif analysis.lang == 'zh':
        from translationese.utils import is_proper_noun_zh as is_proper_noun
    else:
        print('language "{}" not implemented yet for single naming'.format(
            analysis.lang))
        exit()

    edge_marker = [('', '')]
    pos_tags = edge_marker + analysis.pos_tags() + edge_marker
    num_tags = len(pos_tags) - 2  # Remove 2 edge markers

    count = 0
    for i in range(len(pos_tags)):
        if     is_proper_noun(pos_tags[i])   and \
           not is_proper_noun(pos_tags[i-1]) and \
           not is_proper_noun(pos_tags[i+1]):
            count += 1

    return {"single_naming": float(count) / num_tags}
Example #7
0
def quantify(analysis):
    """Quantify explicit naming."""
    if analysis.lang == 'en':
        from translationese.pronouns import PRONOUNS as PRONOUNS_LIST
        from translationese.utils import is_proper_noun
    elif analysis.lang == 'zh':
        from translationese.pronouns import PRONOUNS_ZH as PRONOUNS_LIST
        from translationese.utils import is_proper_noun_zh as is_proper_noun
    else:
        print('language "{}" not implemented yet for explicit naming'.format(
            analysis.lang))
        exit()

    proper_nouns = sum(1 for x in analysis.pos_tags() if is_proper_noun(x))
    pronouns = sum(1 for x in analysis.tokens() if x in PRONOUNS_LIST)

    return {"explicit_naming": 3.0 * (float(pronouns) / proper_nouns)}
Example #8
0
def quantify(analysis):
    num_proper_noun_runs = 0
    num_proper_noun_tokens = 0
    currently_in_run = False

    pos_tags = nltk.pos_tag(analysis.case_tokens())

    for token in pos_tags:
        if is_proper_noun(token):
            currently_in_run = True
            num_proper_noun_tokens += 1
        elif currently_in_run:
            # Run just ended
            currently_in_run = False
            num_proper_noun_runs += 1

    if num_proper_noun_runs == 0:
        result = 0
    else:
        result = float(num_proper_noun_tokens) / num_proper_noun_runs
    
    return { "mean_multiple_naming": result }