def prepare(): if args.gpu >= 0: cuda.check_cuda_available() cuda.get_device(args.gpu).use() reader = SentenceReaderDir(args.intraindata, args.batchsize) print("") print("-" * 50) print('n_vocab: %d' % (len(reader.word2index) - 1)) # excluding the three special tokens print('corpus size: %d' % (reader.total_words)) max_sen_len = MAX_SEN_LEN train_data = convert_numeric_data(reader.data, args.batchsize, reader.word2index, max_sen_len) test_data, freq, _ = load_xml(args.intestdata) test_data = convert_numeric_data(test_data, args.batchsize, reader.word2index, max_sen_len) return reader, train_data, test_data
return args args = parse_arguments() context_word_units = args.unit lstm_hidden_units = IN_TO_OUT_UNITS_RATIO * args.unit target_word_units = IN_TO_OUT_UNITS_RATIO * args.unit if args.gpu >= 0: cuda.check_cuda_available() cuda.get_device(args.gpu).use() xp = cuda.cupy if args.gpu >= 0 else np reader = SentenceReaderDir(args.indir, args.trimfreq, args.batchsize) print('n_vocab: %d' % (len(reader.word2index) - 3)) # excluding the three special tokens print('corpus size: %d' % (reader.total_words)) cs = [ reader.trimmed_word2count[w] for w in range(len(reader.trimmed_word2count)) ] loss_func = L.NegativeSampling(target_word_units, cs, NEGATIVE_SAMPLING_NUM, args.ns_power) if args.context == 'lstm': model = BiLstmContext(args.deep, args.gpu, reader.word2index, context_word_units, lstm_hidden_units, target_word_units, loss_func, True, args.dropout) else:
return args args = parse_arguments() context_word_units = args.unit lstm_hidden_units = IN_TO_OUT_UNITS_RATIO*args.unit target_word_units = IN_TO_OUT_UNITS_RATIO*args.unit if args.gpu >= 0: cuda.check_cuda_available() cuda.get_device(args.gpu).use() xp = cuda.cupy if args.gpu >= 0 else np reader = SentenceReaderDir(args.indir, args.trimfreq, args.batchsize) print('n_vocab: %d' % (len(reader.word2index)-3)) # excluding the three special tokens print('corpus size: %d' % (reader.total_words)) cs = [reader.trimmed_word2count[w] for w in range(len(reader.trimmed_word2count))] loss_func = L.NegativeSampling(target_word_units, cs, NEGATIVE_SAMPLING_NUM, args.ns_power) if args.context == 'lstm': model = BiLstmContext(args.deep, args.gpu, reader.word2index, context_word_units, lstm_hidden_units, target_word_units, loss_func, True, args.dropout) else: raise Exception('Unknown context type: {}'.format(args.context)) optimizer = O.Adam() optimizer.setup(model) STATUS_INTERVAL = 1000000