def test_testing(self): self.assertTrue(True) from dlnn.Dlnn import Dlnn from dlnn.Dlnn import DLNN_DEFAULT_CONFIG dlnn = Dlnn(**DLNN_DEFAULT_CONFIG) network = dlnn.get_model() result = network.predict(corpus_data[0].reshape( (1, corpus_data.shape[1])), batch_size=corpus_data.shape[0]) print(result.argmax(axis=1))
def test_baked_dlnn(self): self.assertTrue(True) from dlnn.Dlnn import Dlnn from dlnn.Dlnn import DLNN_DEFAULT_CONFIG train_eval = Dlnn(**DLNN_DEFAULT_CONFIG).train(corpus_data, corpus_label - 1) self.assertIsNotNone(train_eval)
def proceed_document(doc_id): import numpy import random from dlnn.Dlnn import Dlnn from dlnn.Dlnn import DLNN_DEFAULT_CONFIG dlnn = Dlnn(**DLNN_DEFAULT_CONFIG) # Todo : Load Dokumen by id (doc_id) [Dokumen.objects.filter(id=doc_id).first()] # Todo : Load pdf f1 = random.randint( 50, 250) # Todo : f1 = cari fitur 1 [calculate_feature_1()] f2 = random.randint( 50, 250) # Todo : f2 = cari fitur 2 [calculate_feature_2()] f3 = random.randint( 50, 250) # Todo : f3 = cari fitur 3 [calculate_feature_3()] f4 = random.randint( 50, 250) # Todo : f4 = cari fitur 4 [calculate_feature_4()] # Todo : masukkan fitur f[1..4] ke database network = dlnn.get_model() result = network.predict(numpy.array([[f1, f2, f3, f4]]), batch_size=1) class_data = result.argmax(axis=1)[0]
def test_baked_dlnn_value(self): self.assertTrue(True) from dlnn.Dlnn import Dlnn from dlnn.Dlnn import DLNN_DEFAULT_CONFIG yc = keras.utils.to_categorical(label_init, len(numpy.unique(label_init))) dlnn = Dlnn(**DLNN_DEFAULT_CONFIG) network = dlnn.get_model() train_eval = network.evaluate(corpus_data, yc) self.assertIsNotNone(train_eval) # network.summary() # print(train_eval) from keras import Model layer_name = 'pre_tiling' intermediate = Model(inputs=network.input, outputs=network.get_layer(layer_name).output) intermediate_output = intermediate.predict(corpus_data) self.assertTrue( numpy.allclose(intermediate_output, normalized, rtol=1e-6)) layer_name = 'cnn_conv_1' intermediate = Model(inputs=network.input, outputs=network.get_layer(layer_name).output) intermediate_output = intermediate.predict(corpus_data) self.assertTrue( numpy.allclose(intermediate_output[0], corr_step_1, rtol=1e-6)) layer_name = 'cnn_activation_1' intermediate = Model(inputs=network.input, outputs=network.get_layer(layer_name).output) intermediate_output = intermediate.predict(corpus_data) self.assertTrue( numpy.allclose(intermediate_output[0], corr_step_2, rtol=1e-6)) layer_name = 'cnn_conv_2' intermediate = Model(inputs=network.input, outputs=network.get_layer(layer_name).output) intermediate_output = intermediate.predict(corpus_data) self.assertTrue( numpy.allclose(intermediate_output[0], corr_step_3, rtol=1e-6)) layer_name = 'cnn_activation_2' intermediate = Model(inputs=network.input, outputs=network.get_layer(layer_name).output) intermediate_output = intermediate.predict(corpus_data) self.assertTrue( numpy.allclose(intermediate_output[0], corr_step_4, rtol=1e-6)) layer_name = 'cnn_pooling_1' intermediate = Model(inputs=network.input, outputs=network.get_layer(layer_name).output) intermediate_output = intermediate.predict(corpus_data) self.assertTrue( numpy.allclose(intermediate_output[0], corr_step_5, rtol=1e-6)) layer_name = 'cnn_conv_3' intermediate = Model(inputs=network.input, outputs=network.get_layer(layer_name).output) intermediate_output = intermediate.predict(corpus_data) self.assertTrue( numpy.allclose(intermediate_output[0], corr_step_6, rtol=1e-6)) layer_name = 'cnn_activation_3' intermediate = Model(inputs=network.input, outputs=network.get_layer(layer_name).output) intermediate_output = intermediate.predict(corpus_data) self.assertTrue( numpy.allclose(intermediate_output[0], corr_step_7, rtol=1e-6)) layer_name = 'cnn_pooling_2' intermediate = Model(inputs=network.input, outputs=network.get_layer(layer_name).output) intermediate_output = intermediate.predict(corpus_data) self.assertTrue( numpy.allclose(intermediate_output[0], corr_step_8, rtol=1e-6)) self.assertTrue( numpy.allclose(intermediate_output, corr_step_8_full, rtol=1e-6)) layer_name = 'bridge_flatten' intermediate = Model(inputs=network.input, outputs=network.get_layer(layer_name).output) intermediate_output = intermediate.predict(corpus_data) self.assertTrue( numpy.allclose(intermediate_output, corr_step_9, rtol=1e-6))
def proceed_document(dokumen_id): import numpy from dlnn.Dlnn import Dlnn from dlnn.Dlnn import DLNN_DEFAULT_CONFIG dlnn = Dlnn(**DLNN_DEFAULT_CONFIG) # Todo : Load Dokumen by id (doc_id) [Dokumen.objects.filter(id=doc_id).first()] dokumen = Dokumen.objects.filter(id=dokumen_id).first() dokumen.state = "Process" dokumen.save() # Todo : Load pdf # spell = SpellChecker() with open(dokumen.filenya.path, "rb") as f: pdf = pdftotext.PDF(f) text = "".join(pdf) # Todo : Normalisasi # pecah kalimat menjadi kata kata text = text.lower() # Converting to lowercase cleanr = re.compile('<.*?>') sentence = re.sub(cleanr, ' ', text) # Removing HTML tags sentence = re.sub(r'[?|!|\'|"|#]', r'', sentence) sentence = re.sub(r'[.|,|)|(|\|/]', r' ', sentence) # Removing Punctuations data_pdf = "".join(sentence) token_data_pdf = nltk.word_tokenize(data_pdf, preserve_line=True) # Fitur 1 - cek salah ketik Bahasa Indonesia url_dic_indo = settings.STATIC_ROOT + '/admin/db_text/kamus_indonesia.txt' kamus_indonesia = open(url_dic_indo, "r") katadasar = kamus_indonesia.read().split('\n') for i in range(len(katadasar)): katadasar[i] = katadasar[i].split("/")[0] salah_ketik_indo = 0 for token in token_data_pdf: if token not in katadasar: salah_ketik_indo += 1 f1 = salah_ketik_indo dokumen.fitur1 = f1 dokumen.save() # Fitur 2 - cek salah ketik Bahasa Inggris url_dic_en = settings.STATIC_ROOT + '/admin/db_text/kamus_english.txt' kamus_inggris = open(url_dic_en, "r") katadasar_en = kamus_inggris.read().split('\n') for i in range(len(katadasar_en)): katadasar_en[i] = katadasar_en[i].split("/")[0] salah_ketik_english = 0 for token in token_data_pdf: if token not in katadasar_en: salah_ketik_english += 1 f2 = salah_ketik_english dokumen.fitur2 = f2 dokumen.save() f3, f4 = calculate_feature_34(dokumen_id) dokumen.fitur3 = f3 dokumen.fitur4 = f4 dokumen.save() f5, f6 = calculate_feature_56(dokumen_id) dokumen.fitur5 = f5 dokumen.fitur6 = f6 dokumen.save() # Todo : masukkan fitur f[1..4] ke database network = dlnn.get_model() result = network.predict(numpy.array([[f1, f2, f3, f4, f5, f6]]), batch_size=1) class_data = result.argmax(axis=1)[0] # print("Class Data {}".format(class_data)) # Todo : masukkan class_data sebagai hasil kelas data [mappingkan dengan kelas seharusnya] [zero based indexing] dokumen.kualitas = class_data dokumen.state = "Done" dokumen.save()