def do_training(indices, training_blob, heldout_blob, weights, weights_out, debiasing_weights): """ Helper/wrapper function for parallel perceptron training. Runs one epoch of perceptron training and reports current accuracy on training data and on heldout data. """ # Under de-biasing mode, we only allow features present in a given initial # weight vector. These are features that have been "selected" under a previously # run regularized training scheme. valid_feature_names = None if FLAGS.debiasing: valid_feature_names = getFeatureNames(debiasing_weights) for epoch in range(FLAGS.maxepochs): # Randomize order of examples; broadcast this randomized order to all processes. # The particular subset any perceptron process gets for this epoch is dependent # upon this randomized ordering. if myRank == 0 and FLAGS.shuffle: random.shuffle(indices) indices = mpi.broadcast(value=indices, root=0) ################################################## # SEARCH: Find 1-best under current model ################################################## # Run one epoch over training data io_helper.write_master("===EPOCH %d TRAINING===\n" %(epoch)) newWeights_avg = perceptron_parallel(epoch, indices, training_blob, weights, valid_feature_names) #################################### # Dump weights for this iteration #################################### if myRank == 0: cPickle.dump(newWeights_avg, weights_out, protocol=cPickle.HIGHEST_PROTOCOL) # Need to flush output somehow here. Does weights_out.flush() work? weights_out.flush() ################################################## # Try a corpus re-decode here with the new weights # This returns a HELDOUT F-SCORE ################################################## # Decode dev data with same new learned weight vector if FLAGS.decodeheldout: io_helper.write_master("===EPOCH %d DECODE HELDOUT===\n" %(epoch)) decode_parallel(newWeights_avg, indices_dev, heldout_blob, "dev") if myRank == 0: weights_out.close()
def do_training(indices, training_blob, heldout_blob, weights, weights_out, debiasing_weights): """ Helper/wrapper function for parallel perceptron training. Runs one epoch of perceptron training and reports current accuracy on training data and on heldout data. """ # Under de-biasing mode, we only allow features present in a given initial # weight vector. These are features that have been "selected" under a previously # run regularized training scheme. valid_feature_names = None if FLAGS.debiasing: valid_feature_names = getFeatureNames(debiasing_weights) # load training instances into memory active_instances = [key for key in ['f_instances','e_instances','etree_instances','ftree_instances','gold_instances','a1_instances','a2_instances','inverse_instances'] if training_blob[key] is not None] for key in active_instances: training_blob[key+'_unshuffled'] = training_blob[key].readlines() for epoch in range(FLAGS.maxepochs): # Randomize order of examples; broadcast this randomized order to all processes. # The particular subset any perceptron process gets for this epoch is dependent # upon this randomized ordering. if myRank == 0 and FLAGS.shuffle: random.shuffle(indices) indices = mpi.bcast(indices, root=0) # Create virtual files in shuffled order for key in active_instances: shuffled = StringIO.StringIO() unshuffled = training_blob[key+'_unshuffled'] for i in indices: shuffled.write(unshuffled[i]) shuffled.seek(0) training_blob[key] = shuffled ################################################## # SEARCH: Find 1-best under current model ################################################## # Run one epoch over training data io_helper.write_master("===EPOCH %d TRAINING===\n" %(epoch)) newWeights_avg = perceptron_parallel(epoch, indices, training_blob, weights, valid_feature_names) #################################### # Dump weights for this iteration #################################### if myRank == 0: json.dump(newWeights_avg, weights_out) weights_out.write('\n') weights_out.flush() ################################################## # Try a corpus re-decode here with the new weights # This returns a HELDOUT F-SCORE ################################################## # Decode dev data with same new learned weight vector if FLAGS.decodeheldout: io_helper.write_master("===EPOCH %d DECODE HELDOUT===\n" %(epoch)) decode_parallel(newWeights_avg, indices_dev, heldout_blob, "dev") ################################################## # Reset heldout files for reading ################################################## if FLAGS.decodeheldout: for key in active_instances: heldout_blob[key].seek(0) if myRank == 0: weights_out.close()