예제 #1
0
def train(args):
    check_present(
        args,
        ["train_corpus", "output_folder", "dev_corpus", "train_shuffle_seed"])

    train_corpus_path = args.train_corpus
    if args.train_shuffle_seed > 0:
        reader = sling.RecordReader(args.train_corpus)
        items = [(key, value) for key, value in reader]
        reader.close()
        r = random.Random(args.train_shuffle_seed)
        r.shuffle(items)
        train_corpus_path = os.path.join(args.output_folder,
                                         "train_shuffled.rec")
        writer = sling.RecordWriter(train_corpus_path)
        for key, value in items:
            writer.write(key, value)
        writer.close()
        print("Wrote shuffled train corpus to %s using seed %d" % \
              (train_corpus_path, args.train_shuffle_seed))

    # Setting an explicit seed for the sake of determinism.
    torch.manual_seed(1)

    # Make commons store if needed.
    if args.commons == '' or not os.path.exists(args.commons):
        if args.commons == '':
            fname = os.path.join(args.output_folder, "commons")
            print("Will create a commons store at", fname)
            args.commons = fname
        else:
            print("No commons found at", args.commons, ", creating it...")
        _, symbols = commons_builder.build(
            [train_corpus_path, args.dev_corpus], args.commons)
        print("Commons created at", args.commons, "with", len(symbols), \
            "symbols besides the usual ones.")

    # Make the training spec.
    spec = Spec()
    spec.build(args.commons, train_corpus_path)

    # Initialize the model with the spec and any word embeddings.
    caspar = Caspar(spec)
    embeddings_file = args.word_embeddings
    if embeddings_file == '': embeddings_file = None
    caspar.initialize(embeddings_file)

    tmp_folder = os.path.join(args.output_folder, "tmp")
    if not os.path.exists(tmp_folder):
        os.makedirs(tmp_folder)

    evaluator = partial(dev_accuracy, args.dev_corpus, tmp_folder)

    output_file_prefix = os.path.join(args.output_folder, "caspar")
    hyperparams = Hyperparams(args)
    print("Using hyperparameters:", hyperparams)

    trainer = Trainer(caspar, hyperparams, evaluator, output_file_prefix)
    train = Corpora(train_corpus_path, spec.commons, gold=True)
    trainer.train(train)
예제 #2
0
def build_bk_wrapper(params): 
    """ Wrapper for calculating power/bispectrum
    """
    cat_corr = params[0]
    kwargs = {} 
    if len(params) > 2: 
        kwargs = params[1]

    spectrum = Spec('bk', cat_corr, **kwargs)
    print spectrum.file()
    spectrum.build()

    return None 
예제 #3
0
def train(args):
  check_present(args, ["train_corpus", "output_folder", "dev_corpus"])

  # Setting an explicit seed for the sake of determinism.
  torch.manual_seed(1)

  # Make commons store if needed.
  if args.commons == '' or not os.path.exists(args.commons):
    if args.commons == '':
      fname = os.path.join(args.output_folder, "commons")
      print "Will create a commons store at", fname
      args.commons = fname
    else:
      print "No commons found at", args.commons, ", creating it..."
    _, symbols = commons_builder.build(
      [args.train_corpus, args.dev_corpus], args.commons)
    print "Commons created at", args.commons, "with", len(symbols), \
      "symbols besides the usual ones."

  # Make the training spec.
  spec = Spec()
  spec.build(args.commons, args.train_corpus)

  # Initialize the model with the spec and any word embeddings.
  caspar = Caspar(spec)
  embeddings_file = args.word_embeddings
  if embeddings_file == '': embeddings_file = None
  caspar.initialize(embeddings_file)

  tmp_folder = os.path.join(args.output_folder, "tmp")
  if not os.path.exists(tmp_folder):
    os.makedirs(tmp_folder)

  evaluator = partial(dev_accuracy,
                      args.commons,
                      args.dev_corpus,
                      tmp_folder)

  output_file_prefix = os.path.join(args.output_folder, "caspar")
  hyperparams = Hyperparams(args)
  print "Using hyperparameters:", hyperparams

  trainer = Trainer(caspar, hyperparams, evaluator, output_file_prefix)
  train = Corpora(args.train_corpus, spec.commons, gold=True)
  trainer.train(train)