def main(_): # init we = WordEmbedding() dc = Document() cf = Classifier() # load data docs = dc.getDocs(labeled_only=True) # load word embedding model if FLAGS.we_model == 'devblog': we_model = we.loadDevblogModel(embedding_dim = FLAGS.we_dim, epochs = FLAGS.we_epoch, window = FLAGS.we_window, min_count = FLAGS.we_min_count) # han2jamo docs.text = docs.text.apply(han2Jamo) elif FLAGS.we_model == 'wiki': we_model = we.loadWikiModel() # word embedding docs.vector = docs.text.apply(lambda x: we.embedding(we_model, x)) # training cf_model = cf.train(docs, './checkpoint') cf.saveModel(cf_model, FLAGS.cf_model)
def main(_): # init we = WordEmbedding() dc = Document() cf = Classifier() # load word embedding model if FLAGS.we_model == 'devblog': we_model = we.loadDevblogModel(embedding_dim=FLAGS.we_dim, epochs=FLAGS.we_epoch, window=FLAGS.we_window, min_count=FLAGS.we_min_count) elif FLAGS.we_model == 'wiki': we_model = we.loadWikiModel() # load classifier model cf_model = cf.loadModel(FLAGS.cf_model) results = [{'text': r} for r in FLAGS.predict] is_devblog = FLAGS.we_model == 'devblog' for i, r in enumerate(FLAGS.predict): # preprocessing text = han2Jamo(r) if is_devblog else r # word embedding df = dc.preprocessing(text, devblog=is_devblog) vector = df.text.apply( lambda x: we.embedding(we_model, x, FLAGS.we_dim)).tolist() if len(vector) == 0: print('🐈 text is not valid :', r) return else: # predict results[i]['predict'] = cf.predict(cf_model, np.array(vector), FLAGS.criterion) return results