Ejemplo n.º 1
0
Archivo: send.py Proyecto: AMDmi3/gem5
def syncdir(srcdir, destdir):
    srcdir = normpath(srcdir)
    destdir = normpath(destdir)
    if not isdir(destdir):
        sys.exit('destination directory "%s" does not exist' % destdir)

    for root, dirs, files in os.walk(srcdir):
        root = normpath(root)
        prefix = os.path.commonprefix([root, srcdir])
        root = root[len(prefix):]
        if root.startswith('/'):
            root = root[1:]
        for rem in [ d for d in dirs if d.startswith('.') or d == 'SCCS']:
            dirs.remove(rem)

        for entry in dirs:
            newdir = joinpath(destdir, root, entry)
            if not isdir(newdir):
                os.mkdir(newdir)
                print 'mkdir', newdir

        for i,d in enumerate(dirs):
            if islink(joinpath(srcdir, root, d)):
                dirs[i] = joinpath(d, '.')

        for entry in files:
            dest = normpath(joinpath(destdir, root, entry))
            src = normpath(joinpath(srcdir, root, entry))
            if not isfile(dest) or not filecmp(src, dest):
                print 'copy %s %s' % (dest, src)
                copy(src, dest)
Ejemplo n.º 2
0
def syncdir(srcdir, destdir):
    srcdir = normpath(srcdir)
    destdir = normpath(destdir)
    if not isdir(destdir):
        sys.exit('destination directory "%s" does not exist' % destdir)

    for root, dirs, files in os.walk(srcdir):
        root = normpath(root)
        prefix = os.path.commonprefix([root, srcdir])
        root = root[len(prefix):]
        if root.startswith('/'):
            root = root[1:]
        for rem in [d for d in dirs if d.startswith('.') or d == 'SCCS']:
            dirs.remove(rem)

        for entry in dirs:
            newdir = joinpath(destdir, root, entry)
            if not isdir(newdir):
                os.mkdir(newdir)
                print 'mkdir', newdir

        for i, d in enumerate(dirs):
            if islink(joinpath(srcdir, root, d)):
                dirs[i] = joinpath(d, '.')

        for entry in files:
            dest = normpath(joinpath(destdir, root, entry))
            src = normpath(joinpath(srcdir, root, entry))
            if not isfile(dest) or not filecmp(src, dest):
                print 'copy %s %s' % (dest, src)
                copy(src, dest)
Ejemplo n.º 3
0
def _binarycmp(filelist, onerror):
    file0, file1 = filelist

    try:
        if filecmp(file0.path, file1.path, shallow=False):
            dupdict = {True: filelist}
        else:
            dupdict = {}
        errlist = []

    except (IOError, OSError) as exc:
        if onerror is not None:
            onerror(exc, abspath(exc.filename))
        dupdict = {}
        errlist = filelist

    return dupdict, errlist
Ejemplo n.º 4
0
def copyfiles(srcdir, dstdir):
    from filecmp import cmp as filecmp
    from shutil import copyfile

    srcdir = normpath(srcdir)
    dstdir = normpath(dstdir)

    if not isdir(dstdir):
        os.mkdir(dstdir)

    for root, dirs, files in os.walk(srcdir):
        root = normpath(root)
        prefix = os.path.commonprefix([root, srcdir])

        root = root[len(prefix):]
        if root.startswith('/'):
            root = root[1:]

        for entry in dirs:
            newdir = joinpath(dstdir, root, entry)
            if not isdir(newdir):
                os.mkdir(newdir)

        for entry in files:
            dest = normpath(joinpath(dstdir, root, entry))
            src = normpath(joinpath(srcdir, root, entry))
            if not isfile(dest) or not filecmp(src, dest):
                copyfile(src, dest)

    # some of the spec benchmarks expect to be run from one directory up.
    # just create some symlinks that solve the problem
    inlink = joinpath(dstdir, 'input')
    outlink = joinpath(dstdir, 'output')
    if not exists(inlink):
        os.symlink('.', inlink)
    if not exists(outlink):
        os.symlink('.', outlink)
Ejemplo n.º 5
0
def copyfiles(srcdir, dstdir):
    from filecmp import cmp as filecmp
    from shutil import copyfile

    srcdir = normpath(srcdir)
    dstdir = normpath(dstdir)

    if not isdir(dstdir):
        os.mkdir(dstdir)

    for root, dirs, files in os.walk(srcdir):
        root = normpath(root)
        prefix = os.path.commonprefix([root, srcdir])

        root = root[len(prefix):]
        if root.startswith('/'):
            root = root[1:]

        for entry in dirs:
            newdir = joinpath(dstdir, root, entry)
            if not isdir(newdir):
                os.mkdir(newdir)

        for entry in files:
            dest = normpath(joinpath(dstdir, root, entry))
            src = normpath(joinpath(srcdir, root, entry))
            if not isfile(dest) or not filecmp(src, dest):
                copyfile(src, dest)

    # some of the spec benchmarks expect to be run from one directory up.
    # just create some symlinks that solve the problem
    inlink = joinpath(dstdir, 'input')
    outlink = joinpath(dstdir, 'output')
    if not exists(inlink):
        os.symlink('.', inlink)
    if not exists(outlink):
        os.symlink('.', outlink)
Ejemplo n.º 6
0
def main():

  opt_h = OptionHelper()

  if opt_h.cascade():
    input_type = opt_h.input_type()

    if opt_h.output() is None:
      opt_h.set_output('out')
    opt_h.check_output()
    cascade_dir = '%s/cascade00' % opt_h.output()
    makedirs(cascade_dir)
    results_fh = open('%s/results' % opt_h.output(), 'w')
    cascade_train_out = '%s/train-out' % cascade_dir
    cascade_test_out = '%s/test-out' % cascade_dir

    basic_cmd = opt_h.basic_cmd()
    output_file_type = ' -outputType UNDERSCORE4CCL'

    log('running initial chunking')
    run_cmd(basic_cmd \
            + opt_h.starter_train() \
            + opt_h.starter_train_out() \
            + output_file_type \
            + ' -output ' + cascade_train_out, \
            verbose=opt_h.verbose())

    run_cmd(basic_cmd \
            + opt_h.starter_train() \
            + opt_h.starter_test() \
            + opt_h.filter_flag() \
            + output_file_type \
            + ' -output ' + cascade_test_out, \
            verbose=opt_h.verbose())

    cascade_iter = 1

    new_cascade_train_out_fname = get_output_fname(cascade_train_out)
    cascade_expand_last = None
    while True:

      # convert test output to trees
      cascade_test_out_fname = get_output_fname(cascade_test_out)
      cascade_expand = []
      log('building corpus record from ' + cascade_test_out)
      for s_ind, sentence in enumerate(open(cascade_test_out_fname)):
        i = 0
        sentence_str = []
        for chunk in sentence.split():
          chunk = chunk.split('_')
          chunk_str = []
          for word in chunk:
            if word.startswith('=') and len(word) > 1:
              chunk_str.append(cascade_expand_last[s_ind][i])
            else:
              chunk_str.append(word)

            i += 1

          if len(chunk) == 1:
            sentence_str.append(chunk_str[0])

          else:
            sentence_str.append('(' + (' '.join(chunk_str)) + ')')

        cascade_expand.append(sentence_str)

      cascade_test_eval_fname = cascade_dir + '/test-eval'
      eval_fh = open(cascade_test_eval_fname, 'w')
      for sent in cascade_expand:
        print >>eval_fh, '(' + (' '.join(sent)).replace(' ;', '') + ')'
      eval_fh.close()

      # evaluate test output as trees

      run_cmd(opt_h.eval_cmd() \
              + opt_h.starter_test() \
              + ' -cclpOutput ' + cascade_test_eval_fname \
              + opt_h.filter_flag(), fh=results_fh, \
              verbose=opt_h.verbose())


      cascade_expand_last = cascade_expand

      log('running cascade level ' + str(cascade_iter))

      # build term frequency map from last train output
      cascade_train_out_fname = new_cascade_train_out_fname
      phrasal_terms = PhrasalTerms(cascade_train_out_fname)

      # create next-run train
      next_run_train_fname = cascade_dir + '/next-train'
      phrasal_terms.write_new_dataset(cascade_train_out_fname, \
                                      next_run_train_fname)

      # run chunker, output re-chunked train
      new_cascade_dir = '%s/cascade%02d' % (opt_h.output(), cascade_iter)
      makedirs(new_cascade_dir)
      cascade_train_out = '%s/train-out' % new_cascade_dir
      run_cmd(basic_cmd \
              + ' -train ' + next_run_train_fname \
              + ' -trainFileType SPL ' \
              + ' -test ' + next_run_train_fname \
              + ' -testFileType SPL ' \
              + output_file_type \
              + ' -output ' + cascade_train_out,
              verbose=opt_h.verbose())

      # if re-chunked train is the same as orig, break
      new_cascade_train_out_fname = get_output_fname(cascade_train_out)
      if filecmp(cascade_train_out_fname, new_cascade_train_out_fname): 
        break

      # create next-run test
      cascade_test_out = '%s/test-out' % new_cascade_dir
      next_run_test_fname = cascade_dir + '/next-test'
      phrasal_terms.write_new_dataset(cascade_test_out_fname, \
                                      next_run_test_fname)

      # run the chunker, output re-chunked test

      run_cmd(basic_cmd \
              + ' -train ' + next_run_train_fname \
              + ' -trainFileType SPL ' \
              + ' -test ' + next_run_test_fname \
              + ' -testFileType SPL ' \
              + output_file_type \
              + ' -output ' + cascade_test_out,
              verbose=opt_h.verbose())

      cascade_dir = new_cascade_dir
      cascade_iter += 1

    results_fh.close()

  else:
    cmd = opt_h.basic_cmd()

    output_flag = ''
    if opt_h.stdout():
      output_flag = ' -output -'

    elif opt_h.output() is not None:
      opt_h.check_output()
      output_flag = ' -output ' + opt_h.output()

    cmd += ' -outputType ' + opt_h.output_type()
 
    cmd += output_flag
    cmd += opt_h.starter_train()
    cmd += opt_h.starter_test()
    cmd += opt_h.filter_flag()

    cmd += ' -E PRCL -e CLUMP,NPS,TREEBANKPREC'
    run_cmd(cmd, verbose=opt_h.verbose())
Ejemplo n.º 7
0
def main():

    opt_h = OptionHelper()

    if opt_h.cascade():
        input_type = opt_h.input_type()

        if opt_h.output() is None:
            opt_h.set_output('out')
        opt_h.check_output()
        cascade_dir = '%s/cascade00' % opt_h.output()
        makedirs(cascade_dir)
        results_fh = open('%s/results' % opt_h.output(), 'w')
        cascade_train_out = '%s/train-out' % cascade_dir
        cascade_test_out = '%s/test-out' % cascade_dir

        basic_cmd = opt_h.basic_cmd()
        output_file_type = ' -outputType UNDERSCORE4CCL'

        log('running initial chunking')
        run_cmd(basic_cmd \
                + opt_h.starter_train() \
                + opt_h.starter_train_out() \
                + output_file_type \
                + ' -output ' + cascade_train_out, \
                verbose=opt_h.verbose())

        run_cmd(basic_cmd \
                + opt_h.starter_train() \
                + opt_h.starter_test() \
                + opt_h.filter_flag() \
                + output_file_type \
                + ' -output ' + cascade_test_out, \
                verbose=opt_h.verbose())

        cascade_iter = 1

        new_cascade_train_out_fname = get_output_fname(cascade_train_out)
        cascade_expand_last = None
        while True:

            # convert test output to trees
            cascade_test_out_fname = get_output_fname(cascade_test_out)
            cascade_expand = []
            log('building corpus record from ' + cascade_test_out)
            for s_ind, sentence in enumerate(open(cascade_test_out_fname)):
                i = 0
                sentence_str = []
                for chunk in sentence.split():
                    chunk = chunk.split('_')
                    chunk_str = []
                    for word in chunk:
                        if word.startswith('=') and len(word) > 1:
                            chunk_str.append(cascade_expand_last[s_ind][i])
                        else:
                            chunk_str.append(word)

                        i += 1

                    if len(chunk) == 1:
                        sentence_str.append(chunk_str[0])

                    else:
                        sentence_str.append('(' + (' '.join(chunk_str)) + ')')

                cascade_expand.append(sentence_str)

            cascade_test_eval_fname = cascade_dir + '/test-eval'
            eval_fh = open(cascade_test_eval_fname, 'w')
            for sent in cascade_expand:
                print >> eval_fh, '(' + (' '.join(sent)).replace(' ;',
                                                                 '') + ')'
            eval_fh.close()

            # evaluate test output as trees

            run_cmd(opt_h.eval_cmd() \
                    + opt_h.starter_test() \
                    + ' -cclpOutput ' + cascade_test_eval_fname \
                    + opt_h.filter_flag(), fh=results_fh, \
                    verbose=opt_h.verbose())

            cascade_expand_last = cascade_expand

            log('running cascade level ' + str(cascade_iter))

            # build term frequency map from last train output
            cascade_train_out_fname = new_cascade_train_out_fname
            phrasal_terms = PhrasalTerms(cascade_train_out_fname)

            # create next-run train
            next_run_train_fname = cascade_dir + '/next-train'
            phrasal_terms.write_new_dataset(cascade_train_out_fname, \
                                            next_run_train_fname)

            # run chunker, output re-chunked train
            new_cascade_dir = '%s/cascade%02d' % (opt_h.output(), cascade_iter)
            makedirs(new_cascade_dir)
            cascade_train_out = '%s/train-out' % new_cascade_dir
            run_cmd(basic_cmd \
                    + ' -train ' + next_run_train_fname \
                    + ' -trainFileType SPL ' \
                    + ' -test ' + next_run_train_fname \
                    + ' -testFileType SPL ' \
                    + output_file_type \
                    + ' -output ' + cascade_train_out,
                    verbose=opt_h.verbose())

            # if re-chunked train is the same as orig, break
            new_cascade_train_out_fname = get_output_fname(cascade_train_out)
            if filecmp(cascade_train_out_fname, new_cascade_train_out_fname):
                break

            # create next-run test
            cascade_test_out = '%s/test-out' % new_cascade_dir
            next_run_test_fname = cascade_dir + '/next-test'
            phrasal_terms.write_new_dataset(cascade_test_out_fname, \
                                            next_run_test_fname)

            # run the chunker, output re-chunked test

            run_cmd(basic_cmd \
                    + ' -train ' + next_run_train_fname \
                    + ' -trainFileType SPL ' \
                    + ' -test ' + next_run_test_fname \
                    + ' -testFileType SPL ' \
                    + output_file_type \
                    + ' -output ' + cascade_test_out,
                    verbose=opt_h.verbose())

            cascade_dir = new_cascade_dir
            cascade_iter += 1

        results_fh.close()

    else:
        cmd = opt_h.basic_cmd()

        output_flag = ''
        if opt_h.stdout():
            output_flag = ' -output -'

        elif opt_h.output() is not None:
            opt_h.check_output()
            output_flag = ' -output ' + opt_h.output()

        cmd += ' -outputType ' + opt_h.output_type()

        cmd += output_flag
        cmd += opt_h.starter_train()
        cmd += opt_h.starter_test()
        cmd += opt_h.filter_flag()

        cmd += ' -E PRCL -e CLUMP,NPS,TREEBANKPREC'
        run_cmd(cmd, verbose=opt_h.verbose())