def quantify(analysis): edge_marker = [('', '')] pos_tags = edge_marker + analysis.pos_tags() + edge_marker num_tags = len(pos_tags) - 2 # Remove 2 edge markers count = 0 for i in range(len(pos_tags)): if is_proper_noun(pos_tags[i]) and \ not is_proper_noun(pos_tags[i-1]) and \ not is_proper_noun(pos_tags[i+1]): count += 1 return {"single_naming": float(count) / num_tags}
def quantify(analysis): edge_marker = [('', '')] pos_tags = edge_marker + analysis.pos_tags() + edge_marker num_tags = len(pos_tags) - 2 # Remove 2 edge markers count = 0 for i in range(len(pos_tags)): if is_proper_noun(pos_tags[i]) and \ not is_proper_noun(pos_tags[i-1]) and \ not is_proper_noun(pos_tags[i+1]): count += 1 return { "single_naming": float(count) / num_tags }
def quantify(analysis): """Quantify explicit naming.""" proper_nouns = sum(1 for x in analysis.pos_tags() if is_proper_noun(x)) pronouns = sum(1 for x in analysis.tokens() if x in PRONOUNS_LIST) return { "explicit_naming": 3.0 * (float(pronouns) / proper_nouns) }
def quantify(analysis): """Quantify mean multiple naming.""" if analysis.lang == 'en': from translationese.utils import is_proper_noun elif analysis.lang == 'zh': from translationese.utils import is_proper_noun_zh as is_proper_noun else: print('language "{}" not implemented yet for single naming'.format(analysis.lang)) exit() num_proper_noun_runs = 0 num_proper_noun_tokens = 0 currently_in_run = False # pos_tags = nltk.pos_tag(analysis.case_tokens()) # pos_tags = [word_tag_pair[1] for word_tag_pair in analysis.pos_tags()] pos_tags = analysis.pos_tags() for token in pos_tags: if is_proper_noun(token): currently_in_run = True num_proper_noun_tokens += 1 elif currently_in_run: # Run just ended currently_in_run = False num_proper_noun_runs += 1 if num_proper_noun_runs == 0: result = 0 else: result = float(num_proper_noun_tokens) / num_proper_noun_runs return { "mean_multiple_naming": result }
def quantify(analysis): """Quantify explicit naming.""" proper_nouns = sum(1 for x in analysis.pos_tags() if is_proper_noun(x)) pronouns = sum(1 for x in analysis.tokens() if x in PRONOUNS_LIST) return {"explicit_naming": 3.0 * (float(pronouns) / proper_nouns)}
def quantify(analysis): """Quantify single naming.""" if analysis.lang == 'en': from translationese.utils import is_proper_noun elif analysis.lang == 'zh': from translationese.utils import is_proper_noun_zh as is_proper_noun else: print('language "{}" not implemented yet for single naming'.format( analysis.lang)) exit() edge_marker = [('', '')] pos_tags = edge_marker + analysis.pos_tags() + edge_marker num_tags = len(pos_tags) - 2 # Remove 2 edge markers count = 0 for i in range(len(pos_tags)): if is_proper_noun(pos_tags[i]) and \ not is_proper_noun(pos_tags[i-1]) and \ not is_proper_noun(pos_tags[i+1]): count += 1 return {"single_naming": float(count) / num_tags}
def quantify(analysis): """Quantify explicit naming.""" if analysis.lang == 'en': from translationese.pronouns import PRONOUNS as PRONOUNS_LIST from translationese.utils import is_proper_noun elif analysis.lang == 'zh': from translationese.pronouns import PRONOUNS_ZH as PRONOUNS_LIST from translationese.utils import is_proper_noun_zh as is_proper_noun else: print('language "{}" not implemented yet for explicit naming'.format( analysis.lang)) exit() proper_nouns = sum(1 for x in analysis.pos_tags() if is_proper_noun(x)) pronouns = sum(1 for x in analysis.tokens() if x in PRONOUNS_LIST) return {"explicit_naming": 3.0 * (float(pronouns) / proper_nouns)}
def quantify(analysis): num_proper_noun_runs = 0 num_proper_noun_tokens = 0 currently_in_run = False pos_tags = nltk.pos_tag(analysis.case_tokens()) for token in pos_tags: if is_proper_noun(token): currently_in_run = True num_proper_noun_tokens += 1 elif currently_in_run: # Run just ended currently_in_run = False num_proper_noun_runs += 1 if num_proper_noun_runs == 0: result = 0 else: result = float(num_proper_noun_tokens) / num_proper_noun_runs return { "mean_multiple_naming": result }