Example #1
0
def main():
  # Set up argument parser.
  parser = argparse.ArgumentParser(description='Create translation files.')
  parser.add_argument(
      '--author',
      default='Ellen Spertus <*****@*****.**>',
      help='name and email address of contact for translators')
  parser.add_argument('--lang', default='en',
                      help='ISO 639-1 source language code')
  parser.add_argument('--output_dir', default='json/',
                      help='relative directory for output files')
  parser.add_argument('--input_file', default='messages.js',
                      help='input file')
  args = parser.parse_args()

  # Read and parse input file.
  results = []
  synonyms = {}
  description = ''
  infile = codecs.open(args.input_file, 'r', 'utf-8')
  for line in infile:
    if line.startswith('///'):
      if description:
        description = description + ' ' + line[3:].strip()
      else:
        description = line[3:].strip()
    else:
      match = _INPUT_DEF_PATTERN.match(line)
      if match:
        result = {}
        result['meaning'] = match.group(1)
        result['source'] = match.group(2)
        if not description:
          print('Warning: No description for ' + result['meaning'])
        result['description'] = description
        description = ''
        results.append(result)
      else:
        match = _INPUT_SYN_PATTERN.match(line)
        if match:
          if description:
            print('Warning: Description preceding definition of synonym {0}.'.
                  format(match.group(1)))
            description = ''
          synonyms[match.group(1)] = match.group(2)
  infile.close()

  # Create <lang_file>.json, keys.json, and qqq.json.
  write_files(args.author, args.lang, args.output_dir, results, False)

  # Create synonyms.json.
  synonym_file_name = os.path.join(os.curdir, args.output_dir, 'synonyms.json')
  with open(synonym_file_name, 'w') as outfile:
    json.dump(synonyms, outfile)
  print("Wrote {0} synonym pairs to {1}.".format(
      len(synonyms), synonym_file_name))
Example #2
0
def main():
    # Set up argument parser.
    parser = argparse.ArgumentParser(description="Create translation files.")
    parser.add_argument(
        "--author",
        default="Ellen Spertus <*****@*****.**>",
        help="name and email address of contact for translators",
    )
    parser.add_argument("--lang", default="en", help="ISO 639-1 source language code")
    parser.add_argument("--output_dir", default="json", help="relative directory for output files")
    parser.add_argument("--input_file", default="messages.js", help="input file")
    parser.add_argument("--quiet", action="store_true", default=False, help="only display warnings, not routine info")
    args = parser.parse_args()
    if not args.output_dir.endswith(os.path.sep):
        args.output_dir += os.path.sep

    # Read and parse input file.
    results = []
    synonyms = {}
    description = ""
    infile = codecs.open(args.input_file, "r", "utf-8")
    for line in infile:
        if line.startswith("///"):
            if description:
                description = description + " " + line[3:].strip()
            else:
                description = line[3:].strip()
        else:
            match = _INPUT_DEF_PATTERN.match(line)
            if match:
                result = {}
                result["meaning"] = match.group(1)
                result["source"] = match.group(2)
                if not description:
                    print("Warning: No description for " + result["meaning"])
                result["description"] = description
                description = ""
                results.append(result)
            else:
                match = _INPUT_SYN_PATTERN.match(line)
                if match:
                    if description:
                        print("Warning: Description preceding definition of synonym {0}.".format(match.group(1)))
                        description = ""
                    synonyms[match.group(1)] = match.group(2)
    infile.close()

    # Create <lang_file>.json, keys.json, and qqq.json.
    write_files(args.author, args.lang, args.output_dir, results, False)

    # Create synonyms.json.
    synonym_file_name = os.path.join(os.curdir, args.output_dir, "synonyms.json")
    with open(synonym_file_name, "w") as outfile:
        json.dump(synonyms, outfile)
    if not args.quiet:
        print("Wrote {0} synonym pairs to {1}.".format(len(synonyms), synonym_file_name))
Example #3
0
def main():
    # Set up argument parser.
    parser = argparse.ArgumentParser(description='Create translation files.')
    parser.add_argument(
        '--author',
        default='Ellen Spertus <*****@*****.**>',
        help='name and email address of contact for translators')
    parser.add_argument('--lang',
                        default='en',
                        help='ISO 639-1 source language code')
    parser.add_argument('--output_dir',
                        default='json',
                        help='relative directory for output files')
    parser.add_argument('--input_file',
                        default='messages.js',
                        help='input file')
    parser.add_argument('--robInput_file',
                        default='robMessages.js',
                        help='robInput file')
    parser.add_argument('--quiet',
                        action='store_true',
                        default=False,
                        help='only display warnings, not routine info')
    args = parser.parse_args()
    if (not args.output_dir.endswith(os.path.sep)):
        args.output_dir += os.path.sep

    # Read and parse input file.
    results = []
    synonyms = {}
    description = ''
    infile = codecs.open(args.input_file, 'r', 'utf-8')
    transform(results, synonyms, description, infile)
    infile.close()
    if os.path.exists(args.robInput_file):
        print('Merged Open Roberta messages')
        infile = codecs.open(args.robInput_file, 'r', 'utf-8')
        transform(results, synonyms, description, infile)
        infile.close()
        with open(args.robInput_file, 'r') as fin:
            print fin.read()

    # Create <lang_file>.json, keys.json, and qqq.json.
    write_files(args.author, args.lang, args.output_dir, results, False)

    # Create synonyms.json.
    synonym_file_name = os.path.join(os.curdir, args.output_dir,
                                     'synonyms.json')
    with open(synonym_file_name, 'w') as outfile:
        json.dump(synonyms, outfile)
    if not args.quiet:
        print("Wrote {0} synonym pairs to {1}.".format(len(synonyms),
                                                       synonym_file_name))
Example #4
0
def main():
    """Parses arguments and processes the specified file.

    Raises:
        IOError: An I/O error occurred with an input or output file.
        InputError: Input files lacked required fields.
    """
    # Set up argument parser.
    parser = argparse.ArgumentParser(description='Create translation files.')
    parser.add_argument(
        '--author',
        default='Ellen Spertus <*****@*****.**>',
        help='name and email address of contact for translators')
    parser.add_argument('--lang',
                        default='en',
                        help='ISO 639-1 source language code')
    parser.add_argument('--output_dir',
                        default='json',
                        help='relative directory for output files')
    parser.add_argument('--xlf', help='file containing xlf definitions')
    parser.add_argument('--templates',
                        default=['template.soy'],
                        nargs='+',
                        help='relative path to Soy templates, comma or space '
                        'separated (used for ordering messages)')
    global args
    args = parser.parse_args()

    # Make sure output_dir ends with slash.
    if (not args.output_dir.endswith(os.path.sep)):
        args.output_dir += os.path.sep

    # Process the input file, and sort the entries.
    units = _process_file(args.xlf)
    files = []
    for arg in args.templates:
        for filename in arg.split(','):
            filename = filename.strip()
            if filename:
                with open(filename) as myfile:
                    files.append(' '.join(line.strip() for line in myfile))
    sorted_units = sort_units(units, ' '.join(files))

    # Write the output files.
    write_files(args.author, args.lang, args.output_dir, sorted_units, True)

    # Delete the input .xlf file.
    command = ['rm', args.xlf]
    subprocess.check_call(command)
    print('Removed ' + args.xlf)
def main():
    """Parses arguments and processes the specified file.

    Raises:
        IOError: An I/O error occurred with an input or output file.
        InputError: Input files lacked required fields.
    """
    # Set up argument parser.
    parser = argparse.ArgumentParser(description="Create translation files.")
    parser.add_argument(
        "--author",
        default="Ellen Spertus <*****@*****.**>",
        help="name and email address of contact for translators",
    )
    parser.add_argument("--lang", default="en", help="ISO 639-1 source language code")
    parser.add_argument("--output_dir", default="json", help="relative directory for output files")
    parser.add_argument("--xlf", help="file containing xlf definitions")
    parser.add_argument(
        "--templates",
        default=["template.soy"],
        nargs="+",
        help="relative path to Soy templates, comma or space " "separated (used for ordering messages)",
    )
    global args
    args = parser.parse_args()

    # Make sure output_dir ends with slash.
    if not args.output_dir.endswith(os.path.sep):
        args.output_dir += os.path.sep

    # Process the input file, and sort the entries.
    units = _process_file(args.xlf)
    files = []
    for arg in args.templates:
        for filename in arg.split(","):
            filename = filename.strip()
            if filename:
                with open(filename) as myfile:
                    files.append(" ".join(line.strip() for line in myfile))
    sorted_units = sort_units(units, " ".join(files))

    # Write the output files.
    write_files(args.author, args.lang, args.output_dir, sorted_units, True)

    # Delete the input .xlf file.
    command = ["rm", args.xlf]
    subprocess.check_call(command)
    print ("Removed " + args.xlf)
Example #6
0
def main():
    """Parses arguments and processes the specified file.

    Raises:
        IOError: An I/O error occurred with an input or output file.
        InputError: Input files lacked required fields.
    """
    # Set up argument parser.
    parser = argparse.ArgumentParser(description='Create translation files.')
    parser.add_argument(
        '--author',
        default='Ellen Spertus <*****@*****.**>',
        help='name and email address of contact for translators')
    parser.add_argument('--lang', default='en',
                        help='ISO 639-1 source language code')
    parser.add_argument('--output_dir', default='json',
                        help='relative directory for output files')
    parser.add_argument('--xlf', help='file containing xlf definitions')
    parser.add_argument('--templates', default=['template.soy'], nargs='+',
                        help='relative path to Soy templates, comma or space '
                        'separated (used for ordering messages)')
    global args
    args = parser.parse_args()

    # Make sure output_dir ends with slash.
    if (not args.output_dir.endswith(os.path.sep)):
      args.output_dir += os.path.sep

    # Process the input file, and sort the entries.
    units = _process_file(args.xlf)
    files = []
    for arg in args.templates:
      for filename in arg.split(','):
        filename = filename.strip();
        if filename:
          with open(filename) as myfile:
            files.append(' '.join(line.strip() for line in myfile))
    sorted_units = sort_units(units, ' '.join(files))

    # Write the output files.
    write_files(args.author, args.lang, args.output_dir, sorted_units, True)

    # Delete the input .xlf file.
    command = ['rm', args.xlf]
    subprocess.check_call(command)
    print('Removed ' + args.xlf)
Example #7
0
def main():
  # Set up argument parser.
  parser = argparse.ArgumentParser(description='Create translation files.')
  parser.add_argument(
      '--author',
      default='Ellen Spertus <*****@*****.**>',
      help='name and email address of contact for translators')
  parser.add_argument('--lang', default='en',
                      help='ISO 639-1 source language code')
  parser.add_argument('--output_dir', default='json',
                      help='relative directory for output files')
  parser.add_argument('--input_file', default='messages.js',
                      help='input file')
  parser.add_argument('--robInput_file', default='robMessages.js',
                      help='robInput file')
  parser.add_argument('--quiet', action='store_true', default=False,
                      help='only display warnings, not routine info')
  args = parser.parse_args()
  if (not args.output_dir.endswith(os.path.sep)):
    args.output_dir += os.path.sep

  # Read and parse input file.
  results = []
  synonyms = {}
  description = ''
  infile = codecs.open(args.input_file, 'r', 'utf-8')
  transform(results, synonyms, description, infile)
  infile.close()
  if os.path.exists(args.robInput_file):
    print('Merged Open Roberta messages');
    infile = codecs.open(args.robInput_file, 'r', 'utf-8')
    transform(results, synonyms, description, infile)
    infile.close()

  # Create <lang_file>.json, keys.json, and qqq.json.
  write_files(args.author, args.lang, args.output_dir, results, False)

  # Create synonyms.json.
  synonym_file_name = os.path.join(os.curdir, args.output_dir, 'synonyms.json')
  with open(synonym_file_name, 'w') as outfile:
    json.dump(synonyms, outfile)
  if not args.quiet:
    print("Wrote {0} synonym pairs to {1}.".format(
        len(synonyms), synonym_file_name))
Example #8
0
def main():
    # Set up argument parser.
    parser = argparse.ArgumentParser(description='Create translation files.')
    parser.add_argument(
        '--author',
        default='Ellen Spertus <*****@*****.**>',
        help='name and email address of contact for translators')
    parser.add_argument('--lang',
                        default='en',
                        help='ISO 639-1 source language code')
    parser.add_argument('--output_dir',
                        default='.',
                        help='relative directory for output files')
    parser.add_argument('--input_file',
                        default='_messages.js',
                        help='input file')
    args = parser.parse_args()

    # Read and parse input file.
    results = []
    description = ''
    file = open(args.input_file)
    for line in file:
        if line.startswith('///'):
            if description:
                description = description + ' ' + line[3:].strip()
            else:
                description = line[3:].strip()
        else:
            match = _INPUT_PATTERN.match(line)
            if match:
                result = {}
                result['meaning'] = match.group(1)
                result['source'] = match.group(2)
                if not description:
                    print('Warning: No description for ' + result['meaning'])
                result['description'] = description
                description = ''
                results.append(result)
    file.close()

    # Create output files.
    write_files(args.author, args.lang, args.output_dir, results, False)
Example #9
0
def main():
    # Output dir names
    outputDataDir = "../imdb-data"
    testDataDir = "test"
    devDataDir = "dev"
    ogDataDir = "og"
    smallDataDir = "sd"
    # Input dir names
    rawDataDir = "../imdb"
    trainFolder = "train"
    testFolder = "test"

    trainingDataPath = os.path.join(rawDataDir, trainFolder)
    allTrainingData = load_data(trainingDataPath)
    posTrainingData = allTrainingData['pos']
    negTrainingData = allTrainingData['neg']
    random.seed(42)
    random.shuffle(posTrainingData)
    random.shuffle(negTrainingData)

    # write sampled filesets
    for k in SAMPLE_DS_SIZES:
        sdOutputDir = os.path.join(outputDataDir, smallDataDir + str(k))
        if os.path.exists(sdOutputDir):
            continue
        subset = sample(posTrainingData, int(k / 2)) + sample(
            negTrainingData, int(k / 2))
        common.write_files(sdOutputDir, subset)

    # write og fileset
    ogOutputDir = os.path.join(outputDataDir, ogDataDir)
    if not os.path.exists(ogOutputDir):
        common.write_files(ogOutputDir, posTrainingData + negTrainingData)

    # write test & dev filesets
    testDataPath = os.path.join(rawDataDir, testFolder)
    testOutputDir = os.path.join(outputDataDir, testDataDir)
    devOutputDir = os.path.join(outputDataDir, devDataDir)
    #if not os.path.exists(testOutputDir):
    allTestData = load_data(testDataPath)
    posTestData = allTestData['pos']
    negTestData = allTestData['neg']

    NUM_DEV_EXAMPLES = 400
    posDevData = posTestData[:NUM_DEV_EXAMPLES]
    posTestData = posTestData[NUM_DEV_EXAMPLES:]
    negDevData = negTestData[:NUM_DEV_EXAMPLES]
    negTestData = negTestData[NUM_DEV_EXAMPLES:]

    common.write_files(testOutputDir, posTestData + negTestData)
    common.write_files(devOutputDir, posDevData + negDevData)
Example #10
0
def main():
  # Set up argument parser.
  parser = argparse.ArgumentParser(description='Create translation files.')
  parser.add_argument(
      '--author',
      default='Ellen Spertus <*****@*****.**>',
      help='name and email address of contact for translators')
  parser.add_argument('--lang', default='en',
                      help='ISO 639-1 source language code')
  parser.add_argument('--output_dir', default='.',
                      help='relative directory for output files')
  parser.add_argument('--input_file', default='_messages.js',
                      help='input file')
  args = parser.parse_args()

  # Read and parse input file.
  results = []
  description = ''
  file = open(args.input_file)
  for line in file:
    if line.startswith('///'):
      if description:
        description = description + ' ' + line[3:].strip()
      else:
        description = line[3:].strip()
    else:
      match = _INPUT_PATTERN.match(line)
      if match:
        result = {}
        result['meaning'] = match.group(1)
        result['source'] = match.group(2)
        if not description:
          print('Warning: No description for ' + result['meaning'])
        result['description'] = description
        description = ''
        results.append(result)
  file.close()

  # Create output files.
  write_files(args.author, args.lang, args.output_dir, results, False)
Example #11
0
def write_naive_samples_to_data_file(inputPath, outputPath):
  '''
  Write the generated naive samples.
  
  The dataset will be called nwd_xxx, with xxx being the version number here.

  inputPath should probably be equal to outputPath.

  Output:
    1. Will write all training sample in outputPath/ directory, with each training sample
    being a .txt file, and the filename is consistent with the id_label_reviewScore format.
    Since we don't generate reviewScore, it will be a hardcoded number 0. Please don't use it.
    2. Will write a single METADATA text file in outputPath/ directory that records
    the sample_stats of this naive dataset.
  '''
  lgf = NaiveLabelGeneratingFunction()
  directoryNameWithVersion = "nwd00"
  outputDir = outputPath + directoryNameWithVersion

  naive_samples, sample_stats = generate_naive_label(inputPath, lgf)
  formatted_sample_stats = format_sample_stats(sample_stats)
  common.write_files(outputDir, naive_samples)
  with open(outputPath + directoryNameWithVersion + "_METADATA", "w+") as f:
    f.write(formatted_sample_stats)
Example #12
0
def main():
    # Set up argument parser.
    parser = argparse.ArgumentParser(description='Create translation files.')
    parser.add_argument(
        '--author',
        default='Ellen Spertus <*****@*****.**>',
        help='name and email address of contact for translators')
    parser.add_argument('--lang',
                        default='en',
                        help='ISO 639-1 source language code')
    parser.add_argument('--output_dir',
                        default='json',
                        help='relative directory for output files')
    parser.add_argument('--input_file',
                        default='messages.js',
                        help='input file')
    parser.add_argument('--quiet',
                        action='store_true',
                        default=False,
                        help='only display warnings, not routine info')
    args = parser.parse_args()
    if (not args.output_dir.endswith(os.path.sep)):
        args.output_dir += os.path.sep

    # Read and parse input file.
    results = []
    synonyms = {}
    constants = {}  # Values that are constant across all languages.
    description = ''
    infile = codecs.open(args.input_file, 'r', 'utf-8')
    for line in infile:
        if line.startswith('///'):
            if description:
                description = description + ' ' + line[3:].strip()
            else:
                description = line[3:].strip()
        else:
            match = _INPUT_DEF_PATTERN.match(line)
            if match:
                key = match.group(1)
                value = match.group(2).replace("\\'", "'")
                if not description:
                    print('Warning: No description for ' + result['meaning'])
                if (description
                        and _CONSTANT_DESCRIPTION_PATTERN.search(description)):
                    constants[key] = value
                else:
                    result = {}
                    result['meaning'] = key
                    result['source'] = value
                    result['description'] = description
                    results.append(result)
                description = ''
            else:
                match = _INPUT_SYN_PATTERN.match(line)
                if match:
                    if description:
                        print(
                            'Warning: Description preceding definition of synonym {0}.'
                            .format(match.group(1)))
                        description = ''
                    synonyms[match.group(1)] = match.group(2)
    infile.close()

    # Create <lang_file>.json, keys.json, and qqq.json.
    write_files(args.author, args.lang, args.output_dir, results, False)

    # Create synonyms.json.
    synonym_file_name = os.path.join(os.curdir, args.output_dir,
                                     'synonyms.json')
    with open(synonym_file_name, 'w') as outfile:
        json.dump(synonyms, outfile)
    if not args.quiet:
        print("Wrote {0} synonym pairs to {1}.".format(len(synonyms),
                                                       synonym_file_name))

    # Create constants.json
    constants_file_name = os.path.join(os.curdir, args.output_dir,
                                       'constants.json')
    with open(constants_file_name, 'w') as outfile:
        json.dump(constants, outfile)
    if not args.quiet:
        print("Wrote {0} constant pairs to {1}.".format(
            len(constants), synonym_file_name))
Example #13
0
def main():
  # Set up argument parser.
  parser = argparse.ArgumentParser(description='Create translation files.')
  parser.add_argument(
      '--author',
      default='Ellen Spertus <*****@*****.**>',
      help='name and email address of contact for translators')
  parser.add_argument('--lang', default='en',
                      help='ISO 639-1 source language code')
  parser.add_argument('--output_dir', default='json',
                      help='relative directory for output files')
  parser.add_argument('--input_file', default='messages.js',
                      help='input file')
  parser.add_argument('--quiet', action='store_true', default=False,
                      help='only display warnings, not routine info')
  args = parser.parse_args()
  if (not args.output_dir.endswith(os.path.sep)):
    args.output_dir += os.path.sep

  # Read and parse input file.
  results = []
  synonyms = {}
  constants = {}  # Values that are constant across all languages.
  description = ''
  infile = codecs.open(args.input_file, 'r', 'utf-8')
  for line in infile:
    if line.startswith('///'):
      if description:
        description = description + ' ' + line[3:].strip()
      else:
        description = line[3:].strip()
    else:
      match = _INPUT_DEF_PATTERN.match(line)
      if match:
        key = match.group(1)
        value = match.group(2)
        if not description:
          print('Warning: No description for ' + result['meaning'])
        if (description and _CONSTANT_DESCRIPTION_PATTERN.search(description)):
          constants[key] = value
        else:
          result = {}
          result['meaning'] = key
          result['source'] = value
          result['description'] = description
          results.append(result)
        description = ''
      else:
        match = _INPUT_SYN_PATTERN.match(line)
        if match:
          if description:
            print('Warning: Description preceding definition of synonym {0}.'.
                  format(match.group(1)))
            description = ''
          synonyms[match.group(1)] = match.group(2)
  infile.close()

  # Create <lang_file>.json, keys.json, and qqq.json.
  write_files(args.author, args.lang, args.output_dir, results, False)

  # Create synonyms.json.
  synonym_file_name = os.path.join(os.curdir, args.output_dir, 'synonyms.json')
  with open(synonym_file_name, 'w') as outfile:
    json.dump(synonyms, outfile)
  if not args.quiet:
    print("Wrote {0} synonym pairs to {1}.".format(
        len(synonyms), synonym_file_name))

  # Create constants.json
  constants_file_name = os.path.join(os.curdir, args.output_dir, 'constants.json')
  with open(constants_file_name, 'w') as outfile:
    json.dump(constants, outfile)
  if not args.quiet:
    print("Wrote {0} constant pairs to {1}.".format(
        len(constants), synonym_file_name))
Example #14
0
          result['source'] = value
          result['description'] = description
          results.append(result)
        description = ''
      else:
        match = _INPUT_SYN_PATTERN.match(line)
        if match:
          if description:
            print('Warning: Description preceding definition of synonym {0}.'.
                  format(match.group(1)))
            description = ''
          synonyms[match.group(1)] = match.group(2)
  infile.close()

  # Create <lang_file>.json, keys.json, and qqq.json.
  write_files(args.author, args.lang, args.output_dir, results, False)

  # Create synonyms.json.
  synonym_file_name = os.path.join(os.curdir, args.output_dir, 'synonyms.json')
  with open(synonym_file_name, 'w') as outfile:
    json.dump(synonyms, outfile)
  if not args.quiet:
    print("Wrote {0} synonym pairs to {1}.".format(
        len(synonyms), synonym_file_name))

  # Create constants.json
  constants_file_name = os.path.join(
    + os.curdir, args.output_dir, 'constants.json')
  with open(constants_file_name, 'w') as outfile:
    json.dump(constants, outfile)
  if not args.quiet: