Ejemplo n.º 1
0
def _edit(args):
    """Edit the mapping directly using an editor."""
    if args:
        abort(USAGE + 'dtags: too many arguments')
    try:
        with TempFile(
            mode='w+t', delete=False, prefix='mapping.', dir=CFG_DIR
        ) as tfile:
            with io.open(MAPPING_FILE, 'rt') as mapping_file:
                tfile.write(EDIT_HELP_COMMENTS + mapping_file.read())
            tfile.flush()
    except (OSError, IOError) as err:
        abort('dtags: failed to edit mapping: {}'.format(err), err.errno)
    else:
        editor = shlex.split(os.environ.get('EDITOR'))
        if not editor:
            abort('dtags: undefined environment variable: ' +
                  style.bad('EDITOR'))
        try:
            sp.check_call(editor + [tfile.name])
        except sp.CalledProcessError as err:
            abort('dtags: failed to edit mapping: {}'.format(err.message))
        else:
            mapping, excluded = parse_mapping(tfile.name)
            save_mapping(mapping)
            rm_files(tfile.name)
            if excluded:
                print('Cleaned the following entries:\n' + excluded + '\n')
            finish('New entries saved successfully')
Ejemplo n.º 2
0
def write(data, path, content=None):

    try:
        temp_file = TempFile(delete=False)

        ext = os.path.splitext(path)[1].lower()
        if ext == '.csv':
            csv.write(data, temp_file.name)
        else:
            omv.write(data, temp_file.name, content)

        temp_file.close()
        os.replace(temp_file.name, path)
    except Exception as e:
        if os.name == 'nt' and e.winerror == 17:
            temp_path = path + '.tmp'
            shutil.copy(temp_file.name, temp_path)
            os.remove(temp_file.name)
            os.replace(temp_path, path)
        else:
            try:
                os.remove(temp_file.name)
            except Exception:
                pass
            raise e
Ejemplo n.º 3
0
def translate_files_slurm(args, cmds, expected_output_files):
    conda_env = '/private/home/pipibjc/.conda/envs/fairseq-20190509'
    for cmd in cmds:
        with TempFile('w') as script:
            sh = f"""#!/bin/bash
            source activate {conda_env}
            {cmd}
            """
            print(sh)
            script.write(sh)
            script.flush()
            cmd = f"sbatch --gres=gpu:1 -c {args.cpu + 2} {args.sbatch_args} --time=15:0:0 {script.name}"
            import sys
            print(cmd, file=sys.stderr)
            check_call(cmd, shell=True)

    # wait for all outputs has finished
    num_finished = 0
    while num_finished < len(expected_output_files):
        num_finished = 0
        for output_file in expected_output_files:
            num_finished += 1 if check_finished(output_file) else 0
        if num_finished < len(expected_output_files):
            time.sleep(3 * 60)
            print("sleeping for 3m ...")
Ejemplo n.º 4
0
 def test_restkey(self):
   with TempFile() as f:
     f.write("テスト1,テスト2\nテスト1,テスト2".encode('utf-8'))
     f.flush()
     loader = CSVLoader(f.name, ['c1'], 'utf-8', restkey='garbage')
     for row in loader:
       self.assertEqual('テスト1', row['c1'])
       self.assertEqual(['テスト2'], row['garbage'])
Ejemplo n.º 5
0
 def test_cp932(self):
     with TempFile() as f:
         f.write("テスト1,テスト2".encode('cp932'))
         f.flush()
         loader = CSVLoader(f.name, None, 'cp932')
         for row in loader:
             self.assertEqual('テスト1', row['c0'])
             self.assertEqual('テスト2', row['c1'])
Ejemplo n.º 6
0
 def test_restval(self):
   with TempFile() as f:
     f.write("テスト1,テスト2\nテスト1,テスト2".encode('utf-8'))
     f.flush()
     loader = CSVLoader(f.name, ['c1', 'c2', 'c3'], 'utf-8', restval='<blank>')
     for row in loader:
       self.assertEqual('テスト1', row['c1'])
       self.assertEqual('テスト2', row['c2'])
       self.assertEqual('<blank>', row['c3'])
Ejemplo n.º 7
0
def class_from_module_repr(cls_name, module_repr):
    path = osp.join(gettempdir(), f'{getuser()}_pyg_jit')
    makedirs(path)
    with TempFile(mode='w+', suffix='.py', delete=False, dir=path) as f:
        f.write(module_repr)
    spec = spec_from_file_location(cls_name, f.name)
    mod = module_from_spec(spec)
    sys.modules[cls_name] = mod
    spec.loader.exec_module(mod)
    return getattr(mod, cls_name)
Ejemplo n.º 8
0
    def test_invalid_param(self):
        with TempFile() as f:
            args = ['--in-format', 'none', f.name]
            self.assertNotEqual(_JubaModelCommand.start(args), 0)

            args = ['--out-format', 'none', f.name]
            self.assertNotEqual(_JubaModelCommand.start(args), 0)

            args = ['--no-such-option']
            self.assertNotEqual(_JubaModelCommand.start(args), 0)
Ejemplo n.º 9
0
 def test_unicode_separator(self):
   with TempFile() as f:
     f.write("v1★v2\ns1★s2\n".encode('utf-8'))
     f.flush()
     loader = CSVLoader(f.name, delimiter='★')
     lines = 0
     for row in loader:
       lines += 1
       self.assertEqual('s1', row['v1'])
       self.assertEqual('s2', row['v2'])
     self.assertEqual(1, lines)
Ejemplo n.º 10
0
 def test_cp932_manual_fieldnames(self):
   with TempFile() as f:
     f.write("テスト1,テスト2\nテスト1,テスト2".encode('cp932'))
     f.flush()
     # assign field names statically
     loader = CSVLoader(f.name, ['列1', '列2'], 'cp932', delimiter=',')
     lines = 0
     for row in loader:
       lines += 1
       self.assertEqual('テスト1', row['列1'])
       self.assertEqual('テスト2', row['列2'])
     self.assertEqual(2, lines)
Ejemplo n.º 11
0
 def test_cp932_seq_fieldnames(self):
   with TempFile() as f:
     f.write("テスト1,テスト2\nテスト1,テスト2".encode('cp932'))
     f.flush()
     # assign sequential field names
     loader = CSVLoader(f.name, False, 'cp932', delimiter=',')
     lines = 0
     for row in loader:
       lines += 1
       self.assertEqual('テスト1', row['c0'])
       self.assertEqual('テスト2', row['c1'])
     self.assertEqual(2, lines)
Ejemplo n.º 12
0
 def test_cp932(self):
   with TempFile() as f:
     f.write("列1,列2\nテスト1,テスト2\n".encode('cp932'))
     f.flush()
     # predict field names from 1st row
     loader = CSVLoader(f.name, None, 'cp932', delimiter=',')
     lines = 0
     for row in loader:
       lines += 1
       self.assertEqual('テスト1', row['列1'])
       self.assertEqual('テスト2', row['列2'])
     self.assertEqual(1, lines)
Ejemplo n.º 13
0
 def test_simple(self):
     with TempFile() as f:
         f.write("k1,\"k2\",k3\n1,2,3\n4,5,6".encode('utf-8'))
         f.flush()
         loader = CSVLoader(f.name)
         for row in loader:
             self.assertEqual(set(['k1', 'k2', 'k3']), set(row.keys()))
             if row['k1'] == '1':
                 self.assertEqual('2', row['k2'])
                 self.assertEqual('3', row['k3'])
             elif row['k1'] == '4':
                 self.assertEqual('5', row['k2'])
                 self.assertEqual('6', row['k3'])
             else:
                 self.fail('unexpected row')
Ejemplo n.º 14
0
 def test_noheader(self):
     with TempFile() as f:
         f.write("1,\"2\",3\n\"4\",5,\"6\"".encode('utf-8'))
         f.flush()
         loader = CSVLoader(f.name, False)
         for row in loader:
             self.assertEqual(set(['c0', 'c1', 'c2']), set(row.keys()))
             if row['c0'] == '1':
                 self.assertEqual('2', row['c1'])
                 self.assertEqual('3', row['c2'])
             elif row['c0'] == '4':
                 self.assertEqual('5', row['c1'])
                 self.assertEqual('6', row['c2'])
             else:
                 self.fail('unexpected row')
Ejemplo n.º 15
0
    def test_simple(self):
        data = 'hello\nworld'
        lines = []

        with TempFile() as f:
            f.write(data.encode())
            f.flush()
            loader = LineBasedFileLoader(f.name)

            for line in loader:
                lines.append(line)

        self.assertEqual([{
            'line': 'hello\n',
            'number': 0
        }, {
            'line': 'world',
            'number': 1
        }], lines)
Ejemplo n.º 16
0
 def test_valid_param(self):
     with TempFile() as f:
         f.write(_get_binary_file().read())
         f.flush()
         args = ['--in-format', 'binary', '--out-format', 'json', f.name]
         self.assertEqual(_JubaModelCommand.start(args), 0)
Ejemplo n.º 17
0
 def tempfile(self, **kwargs):
     """ Create a NamedTemporaryFile in the TempPath. """
     kwargs.pop("dir", None)
     return TempFile(dir=str(self), **kwargs)
Ejemplo n.º 18
0
 def __init__(self,fs,path,mode):
     self.file = TempFile()
     self.fs = fs
     self.path = path
     self.mode = mode
Ejemplo n.º 19
0
 def test_guess_header(self):
   with TempFile() as f:
     f.write("k1|k2|k3\n1|2|3".encode())
     f.flush()
     loader = CSVLoader(f.name, fieldnames=True, delimiter='|')
     self.assertEqual([{'k1': '1', 'k2': '2', 'k3': '3'}], list(loader))
Ejemplo n.º 20
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--data',
                        '-d',
                        required=True,
                        help='Path to file to translate')
    parser.add_argument('--model',
                        '-m',
                        required=True,
                        help='Model checkpoint')
    parser.add_argument('--lenpen',
                        default=1.2,
                        type=float,
                        help='Length penalty')
    parser.add_argument('--beam', default=5, type=int, help='Beam size')
    parser.add_argument('--max-len-a',
                        type=float,
                        default=0,
                        help='max-len-a parameter when back-translating')
    parser.add_argument('--max-len-b',
                        type=int,
                        default=200,
                        help='max-len-b parameter when back-translating')
    parser.add_argument('--cpu',
                        type=int,
                        default=4,
                        help='Number of CPU for interactive.py')
    parser.add_argument(
        '--cuda-visible-device-ids',
        '-gids',
        default=None,
        nargs='*',
        help='List of cuda visible devices ids, camma separated')
    parser.add_argument(
        '--dest', help='Output path for the intermediate and translated file')
    parser.add_argument('--max-tokens',
                        type=int,
                        default=12000,
                        help='max tokens')
    parser.add_argument('--buffer-size',
                        type=int,
                        default=10000,
                        help='Buffer size')
    parser.add_argument('--chunks', type=int, default=100)
    parser.add_argument(
        '--source-lang',
        type=str,
        default=None,
        help='Source langauge. Will inference from the model if not set')
    parser.add_argument(
        '--target-lang',
        type=str,
        default=None,
        help='Target langauge. Will inference from the model if not set')
    parser.add_argument(
        '--databin',
        type=str,
        default=None,
        help='Parallel databin. Will combine with the back-translated databin')
    parser.add_argument('--sbatch-args',
                        default='',
                        help='Extra SBATCH arguments')

    parser.add_argument('--backend',
                        type=str,
                        default='local',
                        choices=['local', 'slurm'])
    args = parser.parse_args()

    args.cuda_visible_device_ids = args.cuda_visible_device_ids or list(
        range(torch.cuda.device_count()))

    chkpnt = torch.load(args.model)
    model_args = chkpnt['args']
    if args.source_lang is None or args.target_lang is None:
        args.source_lang = args.source_lang or model_args.source_lang
        args.target_lang = args.target_lang or model_args.target_lang
    if args.databin is None:
        args.databin = args.databin or model_args.data

    root_dir = os.path.dirname(os.path.realpath(__file__))
    translation_dir = os.path.join(args.dest or root_dir, 'translations',
                                   f'{args.source_lang}-{args.target_lang}')

    tempdir = os.path.join(translation_dir, 'splits')
    os.makedirs(tempdir, exist_ok=True)
    split_files = glob(f'{tempdir}/mono_data*')

    if len(split_files) != args.chunks:
        if len(split_files) != 0:
            print(
                "number of split files are not the same as chunks. removing files and re-split"
            )
            [os.remove(os.path.join(tempdir, f)) for f in os.listdir(tempdir)]
        print("splitting files ...")
        check_call(
            f'split -n "r/{args.chunks}" -a3 -d {args.data} {tempdir}/mono_data',
            shell=True)
        split_files = glob(f'{tempdir}/mono_data*')
    else:
        print(
            "has the same number of splitted file and the specified chunks, skip splitting file"
        )

    translated_files = []
    files_to_translate = []
    for file in split_files:
        # skip the translation job if it's finished
        output_file = get_output_file(translation_dir, file)
        translated_files.append(output_file)
        if check_finished(output_file):
            print(f"{output_file} is translated")
            continue
        files_to_translate.append(file)

    print(f"{len(files_to_translate)} files to translate")

    translate_files(args, translation_dir, files_to_translate)

    # aggregate translated files
    generated_src = f'{args.dest}/generated.src'
    generated_tgt = f'{args.dest}/generated.hypo'
    if count_line(generated_src) != count_line(generated_tgt) or count_line(
            generated_src) <= 0:
        print(f"aggregating translated {len(translated_files)} files")
        with TempFile() as fout:
            files = " ".join(translated_files)
            check_call(f"cat {files}", shell=True, stdout=fout)
            # strip head and make pairs
            check_call(
                f'cat {fout.name} | grep "^S" | cut -f2 > {generated_src}',
                shell=True)
            check_call(
                f'cat {fout.name} | grep "^H" | cut -f3 > {generated_tgt}',
                shell=True)
    assert count_line(generated_src) == count_line(generated_tgt)
    print(f"output generated files to {generated_src}, {generated_tgt}")
Ejemplo n.º 21
0
def _screen_shot(self):
    from tempfile import NamedTemporaryFile as TempFile
    tmp = TempFile(suffix='.png')
    self.save_screenshot(tmp.name)
    return tmp.read()
Ejemplo n.º 22
0
 def tempfile(self, filename=None, **kwargs):
     """ Instantiate a NamedTemporaryFile or use an existing file in the TempPath and return it as a Path object. """
     return Path(TempFile(dir=str(self), **kwargs).name if filename is None else self.joinpath(filename))
class SearchSchemaParser(object):
  """Search schema parser.

  Parses POI-file and generates: a string containing the SQL statements required
  to generate and populate postgres tables, a string for querying.
  """
  EVENTS = ("start", "end")

  SEARCH_TABLE_SCHEMA_TAG = "SearchTableSchema"
  SEARCH_TABLE_VALUES_TAG = "SearchTableValues"
  BALLOON_STYLE_TAG = "BalloonStyle"
  FIELD_TAG = "field"
  R_TAG = "r"
  LAT_TAG = "lat"
  LON_TAG = "lon"
  SEARCH_FILE_NAME = "SearchDataFile"

  # Types that need to be UTF-encoded when writing to postgres database.
  ENCODE_TYPES = ["varchar", "character varying", "character", "text"]

  def __init__(self, db_updater):
    """Inits search schema parser.

    Args:
      db_updater: database updater.
    """
    self._db_updater = db_updater
    self._current_tag = None
    self._within_record = False
    self._within_style = False

  def Parse(self, search_file, table_name, file_prefix=None):
    """Parser entry point.

    Parses the given POI file to POI elements, based on POI elements builds
    SQL statements for creating and populating POI table in POI database and
    triggers the DB updater to implement these SQL statements.

    Args:
      search_file: string containing absolute path to .poi file to be parsed.
      table_name: string containing name to use for POI table creation.
    Returns:
      num_fields: number of fields in search schema.
      sql_search: string containing SQL statement to execute for POI query.
      balloon_style: string containing associated balloon style.

    Raises:
      exceptions.SearchSchemaParserException exception.
      psycopg2.Warning/Error exceptions.
    """
    self._table_name = table_name
    self._file_prefix = file_prefix
    logger.info("Ingesting POI file %s into parser...", search_file)
    if file_prefix is None:
      logger.info("File prefix is None")
    else:
      logger.info("File prefix is '%s'", file_prefix)
    self.__StartDocument()
    try:
      context = ET.iterparse(search_file, SearchSchemaParser.EVENTS)
    except ET.ParseError, e:
      row, column = e.position
      raise exceptions.SearchSchemaParserException(
          "Unable to parse POI file %s."
          " A parsing error on row %d column %d: %s" % (
              search_file, row, column, e))

    logger.info("Ingesting POI file %s into parser done.", search_file)
    logger.info("Parsing POI elements and inserting into POI database...")
    # File as temp buffer to store records, for COPY db command
    self.tmp_file = TempFile(max_size=_K_SPOOL_SIZE, suffix=table_name)
    num_elements = 0
    self._element_start = self.__StartElementHeader
    self._element_end = self.__EndElementHeader
    for event, elem in context:
      if event == "start":
        self.__StartElement(elem)
      elif event == "end":
        self.__EndElement(elem)
        num_elements += 1
        elem.clear()

    self.__EndDocument()
    logger.info("Total POI elements: %s.", num_elements)
    logger.info("Parsing POI elements and inserting into POI database done.")
    return (self._num_fields, self._sql_search, self._balloon_style)
Ejemplo n.º 24
0
        def write_csv(content):
            file_ = TempFile(mode="w+", suffix=".csv")
            file_.write(content)
            file_.seek(0)

            return file_