# check and process input arguments args = parse_args(sys.argv[1:]) for arg in ['input', 'output']: if not arg in args: logger.error('Argument ' + arg + ' is missing') sys.exit(1) limit, workers, min_count, size, input, output = args['limit'], args['workers'], args['min_count'], \ args['size'], args['input'], args['output'] # log arguments logger.info('Training with: ' + ' '.join([k + " : " + str(v) for k, v in args.iteritems()])) # import sentences sentences = slice(LineSentence(input), limit); try: # train model model = Word2Vec(sentences, min_count=min_count, workers=workers, size=size) # Save model logger.info('Saving model to file') model.save(output + "_" + str(limit)) logger.info('Model has been saved.') except Exception as exc: logger.exception('Exception in model training: ' + str(exc)) logger.info('Done!')
def train_net(net, epochs=5, batch_size=1, lr=0.1, val_percent=0.05, save_cp=True, gpu=False, epoch_size=10, window=512, obs_size=10): print('''Starting training: Epochs: {} Batch : {} Learning rate: {} Checkpoints: {} CUDA: {} '''.format(epochs, batch_size, lr, str(save_cp), str(gpu))) dir_checkpoint = 'checkpoints/' dataset = HelioDataset('data/SIDC_dataset.csv', 'data/fenyi', epoch_size) optimizer = optim.SGD(net.parameters(), lr=lr, momentum=0.9, weight_decay=0.0005) bce = nn.BCELoss() data_loader = DataLoader(dataset) for epoch in range(epochs): print('Starting epoch {}/{}.'.format(epoch + 1, epochs)) net.train() for _, obs in enumerate(data_loader): obs = keep_best(slice(obs, window, window // 2), obs_size) for idx in range(0, len(obs['imgs']), batch_size): imgs = obs['imgs'][idx:idx + batch_size].float() true_masks = obs['masks'][idx:idx + batch_size].float() if gpu: imgs = imgs.cuda() true_masks = true_masks.cuda() masks_pred = net(imgs) masks_probs_flat = masks_pred.view(-1) true_masks_flat = true_masks.view(-1) loss = bce(masks_probs_flat, true_masks_flat) print(int(idx), '-', int(idx + batch_size), '> loss: {0:.6f}'.format(loss.item())) optimizer.zero_grad() loss.backward() optimizer.step() print('Epoch finished!') #if 1: # val_dice = eval_net(net, val, gpu) # print('Validation Dice Coeff: {}'.format(val_dice)) if save_cp: torch.save(net.state_dict(), dir_checkpoint + 'CP512-{}.pth'.format(epoch + 1)) print('Checkpoint {} saved !'.format(epoch + 1))
if __name__ == '__main__': # set up logging program = os.path.basename(sys.argv[0]) logger = logging.getLogger(program) logging.basicConfig(format='%(asctime)s: %(levelname)s: %(message)s') logging.root.setLevel(level=logging.INFO) logger.info("running: %s" % ' '.join(sys.argv)) # check and process input arguments args = parse_args(sys.argv[1:]) if not 'input' in args: logger.error("No input given!") sys.exit(1) # get args inp, outp, limit = args['input'], args['output'], args['limit'] # prepare corpus wiki = WikiCorpus(inp, lemmatize=False, dictionary={}) texts = slice(wiki.get_texts(), limit); # save this for efficiency space = " " output = open(outp, 'w') iterate_with_logging(logger, 10000, texts, lambda text: output.write(space.join(text) + "\n")) output.close()