def main(): # Set up argument parser. parser = argparse.ArgumentParser(description='Create translation files.') parser.add_argument( '--author', default='Ellen Spertus <*****@*****.**>', help='name and email address of contact for translators') parser.add_argument('--lang', default='en', help='ISO 639-1 source language code') parser.add_argument('--output_dir', default='json/', help='relative directory for output files') parser.add_argument('--input_file', default='messages.js', help='input file') args = parser.parse_args() # Read and parse input file. results = [] synonyms = {} description = '' infile = codecs.open(args.input_file, 'r', 'utf-8') for line in infile: if line.startswith('///'): if description: description = description + ' ' + line[3:].strip() else: description = line[3:].strip() else: match = _INPUT_DEF_PATTERN.match(line) if match: result = {} result['meaning'] = match.group(1) result['source'] = match.group(2) if not description: print('Warning: No description for ' + result['meaning']) result['description'] = description description = '' results.append(result) else: match = _INPUT_SYN_PATTERN.match(line) if match: if description: print('Warning: Description preceding definition of synonym {0}.'. format(match.group(1))) description = '' synonyms[match.group(1)] = match.group(2) infile.close() # Create <lang_file>.json, keys.json, and qqq.json. write_files(args.author, args.lang, args.output_dir, results, False) # Create synonyms.json. synonym_file_name = os.path.join(os.curdir, args.output_dir, 'synonyms.json') with open(synonym_file_name, 'w') as outfile: json.dump(synonyms, outfile) print("Wrote {0} synonym pairs to {1}.".format( len(synonyms), synonym_file_name))
def main(): # Set up argument parser. parser = argparse.ArgumentParser(description="Create translation files.") parser.add_argument( "--author", default="Ellen Spertus <*****@*****.**>", help="name and email address of contact for translators", ) parser.add_argument("--lang", default="en", help="ISO 639-1 source language code") parser.add_argument("--output_dir", default="json", help="relative directory for output files") parser.add_argument("--input_file", default="messages.js", help="input file") parser.add_argument("--quiet", action="store_true", default=False, help="only display warnings, not routine info") args = parser.parse_args() if not args.output_dir.endswith(os.path.sep): args.output_dir += os.path.sep # Read and parse input file. results = [] synonyms = {} description = "" infile = codecs.open(args.input_file, "r", "utf-8") for line in infile: if line.startswith("///"): if description: description = description + " " + line[3:].strip() else: description = line[3:].strip() else: match = _INPUT_DEF_PATTERN.match(line) if match: result = {} result["meaning"] = match.group(1) result["source"] = match.group(2) if not description: print("Warning: No description for " + result["meaning"]) result["description"] = description description = "" results.append(result) else: match = _INPUT_SYN_PATTERN.match(line) if match: if description: print("Warning: Description preceding definition of synonym {0}.".format(match.group(1))) description = "" synonyms[match.group(1)] = match.group(2) infile.close() # Create <lang_file>.json, keys.json, and qqq.json. write_files(args.author, args.lang, args.output_dir, results, False) # Create synonyms.json. synonym_file_name = os.path.join(os.curdir, args.output_dir, "synonyms.json") with open(synonym_file_name, "w") as outfile: json.dump(synonyms, outfile) if not args.quiet: print("Wrote {0} synonym pairs to {1}.".format(len(synonyms), synonym_file_name))
def main(): # Set up argument parser. parser = argparse.ArgumentParser(description='Create translation files.') parser.add_argument( '--author', default='Ellen Spertus <*****@*****.**>', help='name and email address of contact for translators') parser.add_argument('--lang', default='en', help='ISO 639-1 source language code') parser.add_argument('--output_dir', default='json', help='relative directory for output files') parser.add_argument('--input_file', default='messages.js', help='input file') parser.add_argument('--robInput_file', default='robMessages.js', help='robInput file') parser.add_argument('--quiet', action='store_true', default=False, help='only display warnings, not routine info') args = parser.parse_args() if (not args.output_dir.endswith(os.path.sep)): args.output_dir += os.path.sep # Read and parse input file. results = [] synonyms = {} description = '' infile = codecs.open(args.input_file, 'r', 'utf-8') transform(results, synonyms, description, infile) infile.close() if os.path.exists(args.robInput_file): print('Merged Open Roberta messages') infile = codecs.open(args.robInput_file, 'r', 'utf-8') transform(results, synonyms, description, infile) infile.close() with open(args.robInput_file, 'r') as fin: print fin.read() # Create <lang_file>.json, keys.json, and qqq.json. write_files(args.author, args.lang, args.output_dir, results, False) # Create synonyms.json. synonym_file_name = os.path.join(os.curdir, args.output_dir, 'synonyms.json') with open(synonym_file_name, 'w') as outfile: json.dump(synonyms, outfile) if not args.quiet: print("Wrote {0} synonym pairs to {1}.".format(len(synonyms), synonym_file_name))
def main(): """Parses arguments and processes the specified file. Raises: IOError: An I/O error occurred with an input or output file. InputError: Input files lacked required fields. """ # Set up argument parser. parser = argparse.ArgumentParser(description='Create translation files.') parser.add_argument( '--author', default='Ellen Spertus <*****@*****.**>', help='name and email address of contact for translators') parser.add_argument('--lang', default='en', help='ISO 639-1 source language code') parser.add_argument('--output_dir', default='json', help='relative directory for output files') parser.add_argument('--xlf', help='file containing xlf definitions') parser.add_argument('--templates', default=['template.soy'], nargs='+', help='relative path to Soy templates, comma or space ' 'separated (used for ordering messages)') global args args = parser.parse_args() # Make sure output_dir ends with slash. if (not args.output_dir.endswith(os.path.sep)): args.output_dir += os.path.sep # Process the input file, and sort the entries. units = _process_file(args.xlf) files = [] for arg in args.templates: for filename in arg.split(','): filename = filename.strip() if filename: with open(filename) as myfile: files.append(' '.join(line.strip() for line in myfile)) sorted_units = sort_units(units, ' '.join(files)) # Write the output files. write_files(args.author, args.lang, args.output_dir, sorted_units, True) # Delete the input .xlf file. command = ['rm', args.xlf] subprocess.check_call(command) print('Removed ' + args.xlf)
def main(): """Parses arguments and processes the specified file. Raises: IOError: An I/O error occurred with an input or output file. InputError: Input files lacked required fields. """ # Set up argument parser. parser = argparse.ArgumentParser(description="Create translation files.") parser.add_argument( "--author", default="Ellen Spertus <*****@*****.**>", help="name and email address of contact for translators", ) parser.add_argument("--lang", default="en", help="ISO 639-1 source language code") parser.add_argument("--output_dir", default="json", help="relative directory for output files") parser.add_argument("--xlf", help="file containing xlf definitions") parser.add_argument( "--templates", default=["template.soy"], nargs="+", help="relative path to Soy templates, comma or space " "separated (used for ordering messages)", ) global args args = parser.parse_args() # Make sure output_dir ends with slash. if not args.output_dir.endswith(os.path.sep): args.output_dir += os.path.sep # Process the input file, and sort the entries. units = _process_file(args.xlf) files = [] for arg in args.templates: for filename in arg.split(","): filename = filename.strip() if filename: with open(filename) as myfile: files.append(" ".join(line.strip() for line in myfile)) sorted_units = sort_units(units, " ".join(files)) # Write the output files. write_files(args.author, args.lang, args.output_dir, sorted_units, True) # Delete the input .xlf file. command = ["rm", args.xlf] subprocess.check_call(command) print ("Removed " + args.xlf)
def main(): """Parses arguments and processes the specified file. Raises: IOError: An I/O error occurred with an input or output file. InputError: Input files lacked required fields. """ # Set up argument parser. parser = argparse.ArgumentParser(description='Create translation files.') parser.add_argument( '--author', default='Ellen Spertus <*****@*****.**>', help='name and email address of contact for translators') parser.add_argument('--lang', default='en', help='ISO 639-1 source language code') parser.add_argument('--output_dir', default='json', help='relative directory for output files') parser.add_argument('--xlf', help='file containing xlf definitions') parser.add_argument('--templates', default=['template.soy'], nargs='+', help='relative path to Soy templates, comma or space ' 'separated (used for ordering messages)') global args args = parser.parse_args() # Make sure output_dir ends with slash. if (not args.output_dir.endswith(os.path.sep)): args.output_dir += os.path.sep # Process the input file, and sort the entries. units = _process_file(args.xlf) files = [] for arg in args.templates: for filename in arg.split(','): filename = filename.strip(); if filename: with open(filename) as myfile: files.append(' '.join(line.strip() for line in myfile)) sorted_units = sort_units(units, ' '.join(files)) # Write the output files. write_files(args.author, args.lang, args.output_dir, sorted_units, True) # Delete the input .xlf file. command = ['rm', args.xlf] subprocess.check_call(command) print('Removed ' + args.xlf)
def main(): # Set up argument parser. parser = argparse.ArgumentParser(description='Create translation files.') parser.add_argument( '--author', default='Ellen Spertus <*****@*****.**>', help='name and email address of contact for translators') parser.add_argument('--lang', default='en', help='ISO 639-1 source language code') parser.add_argument('--output_dir', default='json', help='relative directory for output files') parser.add_argument('--input_file', default='messages.js', help='input file') parser.add_argument('--robInput_file', default='robMessages.js', help='robInput file') parser.add_argument('--quiet', action='store_true', default=False, help='only display warnings, not routine info') args = parser.parse_args() if (not args.output_dir.endswith(os.path.sep)): args.output_dir += os.path.sep # Read and parse input file. results = [] synonyms = {} description = '' infile = codecs.open(args.input_file, 'r', 'utf-8') transform(results, synonyms, description, infile) infile.close() if os.path.exists(args.robInput_file): print('Merged Open Roberta messages'); infile = codecs.open(args.robInput_file, 'r', 'utf-8') transform(results, synonyms, description, infile) infile.close() # Create <lang_file>.json, keys.json, and qqq.json. write_files(args.author, args.lang, args.output_dir, results, False) # Create synonyms.json. synonym_file_name = os.path.join(os.curdir, args.output_dir, 'synonyms.json') with open(synonym_file_name, 'w') as outfile: json.dump(synonyms, outfile) if not args.quiet: print("Wrote {0} synonym pairs to {1}.".format( len(synonyms), synonym_file_name))
def main(): # Set up argument parser. parser = argparse.ArgumentParser(description='Create translation files.') parser.add_argument( '--author', default='Ellen Spertus <*****@*****.**>', help='name and email address of contact for translators') parser.add_argument('--lang', default='en', help='ISO 639-1 source language code') parser.add_argument('--output_dir', default='.', help='relative directory for output files') parser.add_argument('--input_file', default='_messages.js', help='input file') args = parser.parse_args() # Read and parse input file. results = [] description = '' file = open(args.input_file) for line in file: if line.startswith('///'): if description: description = description + ' ' + line[3:].strip() else: description = line[3:].strip() else: match = _INPUT_PATTERN.match(line) if match: result = {} result['meaning'] = match.group(1) result['source'] = match.group(2) if not description: print('Warning: No description for ' + result['meaning']) result['description'] = description description = '' results.append(result) file.close() # Create output files. write_files(args.author, args.lang, args.output_dir, results, False)
def main(): # Output dir names outputDataDir = "../imdb-data" testDataDir = "test" devDataDir = "dev" ogDataDir = "og" smallDataDir = "sd" # Input dir names rawDataDir = "../imdb" trainFolder = "train" testFolder = "test" trainingDataPath = os.path.join(rawDataDir, trainFolder) allTrainingData = load_data(trainingDataPath) posTrainingData = allTrainingData['pos'] negTrainingData = allTrainingData['neg'] random.seed(42) random.shuffle(posTrainingData) random.shuffle(negTrainingData) # write sampled filesets for k in SAMPLE_DS_SIZES: sdOutputDir = os.path.join(outputDataDir, smallDataDir + str(k)) if os.path.exists(sdOutputDir): continue subset = sample(posTrainingData, int(k / 2)) + sample( negTrainingData, int(k / 2)) common.write_files(sdOutputDir, subset) # write og fileset ogOutputDir = os.path.join(outputDataDir, ogDataDir) if not os.path.exists(ogOutputDir): common.write_files(ogOutputDir, posTrainingData + negTrainingData) # write test & dev filesets testDataPath = os.path.join(rawDataDir, testFolder) testOutputDir = os.path.join(outputDataDir, testDataDir) devOutputDir = os.path.join(outputDataDir, devDataDir) #if not os.path.exists(testOutputDir): allTestData = load_data(testDataPath) posTestData = allTestData['pos'] negTestData = allTestData['neg'] NUM_DEV_EXAMPLES = 400 posDevData = posTestData[:NUM_DEV_EXAMPLES] posTestData = posTestData[NUM_DEV_EXAMPLES:] negDevData = negTestData[:NUM_DEV_EXAMPLES] negTestData = negTestData[NUM_DEV_EXAMPLES:] common.write_files(testOutputDir, posTestData + negTestData) common.write_files(devOutputDir, posDevData + negDevData)
def write_naive_samples_to_data_file(inputPath, outputPath): ''' Write the generated naive samples. The dataset will be called nwd_xxx, with xxx being the version number here. inputPath should probably be equal to outputPath. Output: 1. Will write all training sample in outputPath/ directory, with each training sample being a .txt file, and the filename is consistent with the id_label_reviewScore format. Since we don't generate reviewScore, it will be a hardcoded number 0. Please don't use it. 2. Will write a single METADATA text file in outputPath/ directory that records the sample_stats of this naive dataset. ''' lgf = NaiveLabelGeneratingFunction() directoryNameWithVersion = "nwd00" outputDir = outputPath + directoryNameWithVersion naive_samples, sample_stats = generate_naive_label(inputPath, lgf) formatted_sample_stats = format_sample_stats(sample_stats) common.write_files(outputDir, naive_samples) with open(outputPath + directoryNameWithVersion + "_METADATA", "w+") as f: f.write(formatted_sample_stats)
def main(): # Set up argument parser. parser = argparse.ArgumentParser(description='Create translation files.') parser.add_argument( '--author', default='Ellen Spertus <*****@*****.**>', help='name and email address of contact for translators') parser.add_argument('--lang', default='en', help='ISO 639-1 source language code') parser.add_argument('--output_dir', default='json', help='relative directory for output files') parser.add_argument('--input_file', default='messages.js', help='input file') parser.add_argument('--quiet', action='store_true', default=False, help='only display warnings, not routine info') args = parser.parse_args() if (not args.output_dir.endswith(os.path.sep)): args.output_dir += os.path.sep # Read and parse input file. results = [] synonyms = {} constants = {} # Values that are constant across all languages. description = '' infile = codecs.open(args.input_file, 'r', 'utf-8') for line in infile: if line.startswith('///'): if description: description = description + ' ' + line[3:].strip() else: description = line[3:].strip() else: match = _INPUT_DEF_PATTERN.match(line) if match: key = match.group(1) value = match.group(2).replace("\\'", "'") if not description: print('Warning: No description for ' + result['meaning']) if (description and _CONSTANT_DESCRIPTION_PATTERN.search(description)): constants[key] = value else: result = {} result['meaning'] = key result['source'] = value result['description'] = description results.append(result) description = '' else: match = _INPUT_SYN_PATTERN.match(line) if match: if description: print( 'Warning: Description preceding definition of synonym {0}.' .format(match.group(1))) description = '' synonyms[match.group(1)] = match.group(2) infile.close() # Create <lang_file>.json, keys.json, and qqq.json. write_files(args.author, args.lang, args.output_dir, results, False) # Create synonyms.json. synonym_file_name = os.path.join(os.curdir, args.output_dir, 'synonyms.json') with open(synonym_file_name, 'w') as outfile: json.dump(synonyms, outfile) if not args.quiet: print("Wrote {0} synonym pairs to {1}.".format(len(synonyms), synonym_file_name)) # Create constants.json constants_file_name = os.path.join(os.curdir, args.output_dir, 'constants.json') with open(constants_file_name, 'w') as outfile: json.dump(constants, outfile) if not args.quiet: print("Wrote {0} constant pairs to {1}.".format( len(constants), synonym_file_name))
def main(): # Set up argument parser. parser = argparse.ArgumentParser(description='Create translation files.') parser.add_argument( '--author', default='Ellen Spertus <*****@*****.**>', help='name and email address of contact for translators') parser.add_argument('--lang', default='en', help='ISO 639-1 source language code') parser.add_argument('--output_dir', default='json', help='relative directory for output files') parser.add_argument('--input_file', default='messages.js', help='input file') parser.add_argument('--quiet', action='store_true', default=False, help='only display warnings, not routine info') args = parser.parse_args() if (not args.output_dir.endswith(os.path.sep)): args.output_dir += os.path.sep # Read and parse input file. results = [] synonyms = {} constants = {} # Values that are constant across all languages. description = '' infile = codecs.open(args.input_file, 'r', 'utf-8') for line in infile: if line.startswith('///'): if description: description = description + ' ' + line[3:].strip() else: description = line[3:].strip() else: match = _INPUT_DEF_PATTERN.match(line) if match: key = match.group(1) value = match.group(2) if not description: print('Warning: No description for ' + result['meaning']) if (description and _CONSTANT_DESCRIPTION_PATTERN.search(description)): constants[key] = value else: result = {} result['meaning'] = key result['source'] = value result['description'] = description results.append(result) description = '' else: match = _INPUT_SYN_PATTERN.match(line) if match: if description: print('Warning: Description preceding definition of synonym {0}.'. format(match.group(1))) description = '' synonyms[match.group(1)] = match.group(2) infile.close() # Create <lang_file>.json, keys.json, and qqq.json. write_files(args.author, args.lang, args.output_dir, results, False) # Create synonyms.json. synonym_file_name = os.path.join(os.curdir, args.output_dir, 'synonyms.json') with open(synonym_file_name, 'w') as outfile: json.dump(synonyms, outfile) if not args.quiet: print("Wrote {0} synonym pairs to {1}.".format( len(synonyms), synonym_file_name)) # Create constants.json constants_file_name = os.path.join(os.curdir, args.output_dir, 'constants.json') with open(constants_file_name, 'w') as outfile: json.dump(constants, outfile) if not args.quiet: print("Wrote {0} constant pairs to {1}.".format( len(constants), synonym_file_name))
result['source'] = value result['description'] = description results.append(result) description = '' else: match = _INPUT_SYN_PATTERN.match(line) if match: if description: print('Warning: Description preceding definition of synonym {0}.'. format(match.group(1))) description = '' synonyms[match.group(1)] = match.group(2) infile.close() # Create <lang_file>.json, keys.json, and qqq.json. write_files(args.author, args.lang, args.output_dir, results, False) # Create synonyms.json. synonym_file_name = os.path.join(os.curdir, args.output_dir, 'synonyms.json') with open(synonym_file_name, 'w') as outfile: json.dump(synonyms, outfile) if not args.quiet: print("Wrote {0} synonym pairs to {1}.".format( len(synonyms), synonym_file_name)) # Create constants.json constants_file_name = os.path.join( + os.curdir, args.output_dir, 'constants.json') with open(constants_file_name, 'w') as outfile: json.dump(constants, outfile) if not args.quiet: