def update_samples(sample_dir, udhr_dir, bcp_to_code_attrib, in_repo):
  """Create samples in sample_dir from the sources in udhr_dir,
  based on the bcp_to_code mapping.  Stage if sample_dir is in the
  repo."""

  tool_utils.check_dir_exists(udhr_dir)

  if in_repo and os.path.isdir(sample_dir) and not tool_utils.git_is_clean(sample_dir):
    raise ValueError('Please clean %s.' % sample_dir)

  comments = [
    '# Attributions for sample excerpts:',
    '#   original - in the public domain, no attribution',
    '#   UN - UN, OHCHR, or affiliate, attribute to UN',
    '#   other - not a UN translation',
    '#   none - not on ohchr, not a UN translation'
  ]
  sample_attrib_list = []
  sample_dir = tool_utils.ensure_dir_exists(sample_dir)
  count = 0
  for bcp, (code, attrib) in bcp_to_code_attrib.iteritems():
    text = None
    src_file = 'udhr_%s.xml' % code
    dst_file = '%s.txt' % bcp
    src_path = os.path.join(udhr_dir, src_file)
    dst_path = os.path.join(sample_dir, dst_file)
    sample = extract_para(src_path)
    sample = fix_sample(sample, bcp)
    if not sample:
      print 'unable to get sample from %s' % src_file
      return
    else:
      with codecs.open(dst_path, 'w', 'utf8') as f:
        f.write(sample)
      print 'created sample %s from %s' % (dst_file, src_file)
      sample_attrib_list.append('%s: %s' % (bcp, attrib))
      count += 1
  print 'Created %d samples' % count

  # Some existing samples that we don't overwrite are not in bcp_to_code_attrib,
  # so they're not listed.  Readers of the attributions.txt file will need to
  # default these to 'none'.
  attrib_data = '\n'.join(comments + sorted(sample_attrib_list))
  with open(os.path.join(sample_dir, 'attributions.txt'), 'w') as f:
    f.write(attrib_data)

  if in_repo:
    tool_utils.git_add_all(sample_dir)

  date = datetime.datetime.now().strftime('%Y-%m-%d')
  dst = 'in %s ' % sample_dir if not in_repo else ''
  print 'Update sample files %sfrom %s as of %s.' % (dst, udhr_dir, date)
Exemple #2
0
def fetch_udhr(fetch_dir):
  """Fetch UDHR xml bundle from unicode.org to fetch_dir."""
  fetch_dir = tool_utils.ensure_dir_exists(fetch_dir)
  dstfile = os.path.join(fetch_dir, UDHR_XML_ZIP_NAME)
  result = urllib.urlretrieve(UDHR_XML_ZIP_URL, dstfile)
  print 'Fetched: ' + result[0]
Exemple #3
0
def update_samples(sample_dir, udhr_dir, bcp_to_code_attrib, in_repo):
  """Create samples in sample_dir from the sources in udhr_dir,
  based on the bcp_to_code mapping.  Stage if sample_dir is in the
  repo.  If sample_dir is in the repo, don't overwrite samples whose
  most recent log entry does not start with 'Updated by tool'"""

  tool_utils.check_dir_exists(udhr_dir)

  if in_repo and os.path.isdir(sample_dir) and not tool_utils.git_is_clean(sample_dir):
    raise ValueError('Please clean %s.' % sample_dir)

  if in_repo:
    repo, subdir = os.path.split(sample_dir)
    tool_samples = frozenset(tool_utils.get_tool_generated(repo, subdir))
    print 'only allowing overwrite of:\n  %s' % '\n  '.join(sorted(tool_samples))

  comments = [
    '# Attributions for sample excerpts:',
    '#   original - in the public domain, no attribution',
    '#   UN - UN, OHCHR, or affiliate, attribute to UN',
    '#   other - not a UN translation',
    '#   none - not on ohchr, not a UN translation'
  ]
  sample_attrib_list = []
  sample_dir = tool_utils.ensure_dir_exists(sample_dir)
  count = 0
  for bcp, (code, attrib) in bcp_to_code_attrib.iteritems():
    text = None
    src_file = 'udhr_%s.xml' % code
    dst_file = '%s_udhr.txt' % bcp
    src_path = os.path.join(udhr_dir, src_file)
    dst_path = os.path.join(sample_dir, dst_file)
    sample = extract_para(src_path)
    if not sample:
      print 'unable to get sample from %s' % src_file
      return
    if in_repo and os.path.isfile(dst_path) and dst_file not in tool_samples:
      print 'Not overwriting modified file %s' % dst_file
    else:
      sample = fix_sample(sample, bcp)
      with codecs.open(dst_path, 'w', 'utf8') as f:
        f.write(sample)
      print 'created sample %s from %s' % (dst_file, src_file)
      count += 1
    sample_attrib_list.append('%s: %s' % (dst_file, attrib))
  print 'Created %d samples' % count

  # Some existing samples that we don't overwrite are not in bcp_to_code_attrib,
  # so they're not listed.  Readers of the attributions.txt file will need to
  # default these to 'none'.
  attrib_data = '\n'.join(comments + sorted(sample_attrib_list)) + '\n'
  with open(os.path.join(sample_dir, 'attributions.txt'), 'w') as f:
    f.write(attrib_data)

  if in_repo:
    tool_utils.git_add_all(sample_dir)

  date = datetime.datetime.now().strftime('%Y-%m-%d')
  dst = 'in %s ' % sample_dir if not in_repo else ''
  noto_ix = udhr_dir.find('nototools')
  src = udhr_dir if noto_ix == -1 else udhr_dir[noto_ix:]

  # prefix of this sample commit message indicates that these were tool-generated
  print 'Updated by tool - sample files %sfrom %s as of %s.' % (dst, src, date)