def update_samples(sample_dir, udhr_dir, bcp_to_code_attrib, in_repo): """Create samples in sample_dir from the sources in udhr_dir, based on the bcp_to_code mapping. Stage if sample_dir is in the repo.""" tool_utils.check_dir_exists(udhr_dir) if in_repo and os.path.isdir(sample_dir) and not tool_utils.git_is_clean(sample_dir): raise ValueError('Please clean %s.' % sample_dir) comments = [ '# Attributions for sample excerpts:', '# original - in the public domain, no attribution', '# UN - UN, OHCHR, or affiliate, attribute to UN', '# other - not a UN translation', '# none - not on ohchr, not a UN translation' ] sample_attrib_list = [] sample_dir = tool_utils.ensure_dir_exists(sample_dir) count = 0 for bcp, (code, attrib) in bcp_to_code_attrib.iteritems(): text = None src_file = 'udhr_%s.xml' % code dst_file = '%s.txt' % bcp src_path = os.path.join(udhr_dir, src_file) dst_path = os.path.join(sample_dir, dst_file) sample = extract_para(src_path) sample = fix_sample(sample, bcp) if not sample: print 'unable to get sample from %s' % src_file return else: with codecs.open(dst_path, 'w', 'utf8') as f: f.write(sample) print 'created sample %s from %s' % (dst_file, src_file) sample_attrib_list.append('%s: %s' % (bcp, attrib)) count += 1 print 'Created %d samples' % count # Some existing samples that we don't overwrite are not in bcp_to_code_attrib, # so they're not listed. Readers of the attributions.txt file will need to # default these to 'none'. attrib_data = '\n'.join(comments + sorted(sample_attrib_list)) with open(os.path.join(sample_dir, 'attributions.txt'), 'w') as f: f.write(attrib_data) if in_repo: tool_utils.git_add_all(sample_dir) date = datetime.datetime.now().strftime('%Y-%m-%d') dst = 'in %s ' % sample_dir if not in_repo else '' print 'Update sample files %sfrom %s as of %s.' % (dst, udhr_dir, date)
def fetch_udhr(fetch_dir): """Fetch UDHR xml bundle from unicode.org to fetch_dir.""" fetch_dir = tool_utils.ensure_dir_exists(fetch_dir) dstfile = os.path.join(fetch_dir, UDHR_XML_ZIP_NAME) result = urllib.urlretrieve(UDHR_XML_ZIP_URL, dstfile) print 'Fetched: ' + result[0]
def update_samples(sample_dir, udhr_dir, bcp_to_code_attrib, in_repo): """Create samples in sample_dir from the sources in udhr_dir, based on the bcp_to_code mapping. Stage if sample_dir is in the repo. If sample_dir is in the repo, don't overwrite samples whose most recent log entry does not start with 'Updated by tool'""" tool_utils.check_dir_exists(udhr_dir) if in_repo and os.path.isdir(sample_dir) and not tool_utils.git_is_clean(sample_dir): raise ValueError('Please clean %s.' % sample_dir) if in_repo: repo, subdir = os.path.split(sample_dir) tool_samples = frozenset(tool_utils.get_tool_generated(repo, subdir)) print 'only allowing overwrite of:\n %s' % '\n '.join(sorted(tool_samples)) comments = [ '# Attributions for sample excerpts:', '# original - in the public domain, no attribution', '# UN - UN, OHCHR, or affiliate, attribute to UN', '# other - not a UN translation', '# none - not on ohchr, not a UN translation' ] sample_attrib_list = [] sample_dir = tool_utils.ensure_dir_exists(sample_dir) count = 0 for bcp, (code, attrib) in bcp_to_code_attrib.iteritems(): text = None src_file = 'udhr_%s.xml' % code dst_file = '%s_udhr.txt' % bcp src_path = os.path.join(udhr_dir, src_file) dst_path = os.path.join(sample_dir, dst_file) sample = extract_para(src_path) if not sample: print 'unable to get sample from %s' % src_file return if in_repo and os.path.isfile(dst_path) and dst_file not in tool_samples: print 'Not overwriting modified file %s' % dst_file else: sample = fix_sample(sample, bcp) with codecs.open(dst_path, 'w', 'utf8') as f: f.write(sample) print 'created sample %s from %s' % (dst_file, src_file) count += 1 sample_attrib_list.append('%s: %s' % (dst_file, attrib)) print 'Created %d samples' % count # Some existing samples that we don't overwrite are not in bcp_to_code_attrib, # so they're not listed. Readers of the attributions.txt file will need to # default these to 'none'. attrib_data = '\n'.join(comments + sorted(sample_attrib_list)) + '\n' with open(os.path.join(sample_dir, 'attributions.txt'), 'w') as f: f.write(attrib_data) if in_repo: tool_utils.git_add_all(sample_dir) date = datetime.datetime.now().strftime('%Y-%m-%d') dst = 'in %s ' % sample_dir if not in_repo else '' noto_ix = udhr_dir.find('nototools') src = udhr_dir if noto_ix == -1 else udhr_dir[noto_ix:] # prefix of this sample commit message indicates that these were tool-generated print 'Updated by tool - sample files %sfrom %s as of %s.' % (dst, src, date)