Ejemplo n.º 1
0
 def test_testing(self):
     self.assertTrue(True)
     from dlnn.Dlnn import Dlnn
     from dlnn.Dlnn import DLNN_DEFAULT_CONFIG
     dlnn = Dlnn(**DLNN_DEFAULT_CONFIG)
     network = dlnn.get_model()
     result = network.predict(corpus_data[0].reshape(
         (1, corpus_data.shape[1])),
                              batch_size=corpus_data.shape[0])
     print(result.argmax(axis=1))
Ejemplo n.º 2
0
 def test_baked_dlnn(self):
     self.assertTrue(True)
     from dlnn.Dlnn import Dlnn
     from dlnn.Dlnn import DLNN_DEFAULT_CONFIG
     train_eval = Dlnn(**DLNN_DEFAULT_CONFIG).train(corpus_data,
                                                    corpus_label - 1)
     self.assertIsNotNone(train_eval)
Ejemplo n.º 3
0
def proceed_document(doc_id):
    import numpy
    import random
    from dlnn.Dlnn import Dlnn
    from dlnn.Dlnn import DLNN_DEFAULT_CONFIG
    dlnn = Dlnn(**DLNN_DEFAULT_CONFIG)
    # Todo : Load Dokumen by id (doc_id) [Dokumen.objects.filter(id=doc_id).first()]
    # Todo : Load pdf
    f1 = random.randint(
        50, 250)  # Todo : f1 = cari fitur 1 [calculate_feature_1()]
    f2 = random.randint(
        50, 250)  # Todo : f2 = cari fitur 2 [calculate_feature_2()]
    f3 = random.randint(
        50, 250)  # Todo : f3 = cari fitur 3 [calculate_feature_3()]
    f4 = random.randint(
        50, 250)  # Todo : f4 = cari fitur 4 [calculate_feature_4()]
    # Todo : masukkan fitur f[1..4] ke database
    network = dlnn.get_model()
    result = network.predict(numpy.array([[f1, f2, f3, f4]]), batch_size=1)
    class_data = result.argmax(axis=1)[0]
Ejemplo n.º 4
0
    def test_baked_dlnn_value(self):
        self.assertTrue(True)
        from dlnn.Dlnn import Dlnn
        from dlnn.Dlnn import DLNN_DEFAULT_CONFIG
        yc = keras.utils.to_categorical(label_init,
                                        len(numpy.unique(label_init)))
        dlnn = Dlnn(**DLNN_DEFAULT_CONFIG)
        network = dlnn.get_model()
        train_eval = network.evaluate(corpus_data, yc)
        self.assertIsNotNone(train_eval)
        # network.summary()
        # print(train_eval)

        from keras import Model
        layer_name = 'pre_tiling'
        intermediate = Model(inputs=network.input,
                             outputs=network.get_layer(layer_name).output)
        intermediate_output = intermediate.predict(corpus_data)
        self.assertTrue(
            numpy.allclose(intermediate_output, normalized, rtol=1e-6))

        layer_name = 'cnn_conv_1'
        intermediate = Model(inputs=network.input,
                             outputs=network.get_layer(layer_name).output)
        intermediate_output = intermediate.predict(corpus_data)
        self.assertTrue(
            numpy.allclose(intermediate_output[0], corr_step_1, rtol=1e-6))

        layer_name = 'cnn_activation_1'
        intermediate = Model(inputs=network.input,
                             outputs=network.get_layer(layer_name).output)
        intermediate_output = intermediate.predict(corpus_data)
        self.assertTrue(
            numpy.allclose(intermediate_output[0], corr_step_2, rtol=1e-6))

        layer_name = 'cnn_conv_2'
        intermediate = Model(inputs=network.input,
                             outputs=network.get_layer(layer_name).output)
        intermediate_output = intermediate.predict(corpus_data)
        self.assertTrue(
            numpy.allclose(intermediate_output[0], corr_step_3, rtol=1e-6))

        layer_name = 'cnn_activation_2'
        intermediate = Model(inputs=network.input,
                             outputs=network.get_layer(layer_name).output)
        intermediate_output = intermediate.predict(corpus_data)
        self.assertTrue(
            numpy.allclose(intermediate_output[0], corr_step_4, rtol=1e-6))

        layer_name = 'cnn_pooling_1'
        intermediate = Model(inputs=network.input,
                             outputs=network.get_layer(layer_name).output)
        intermediate_output = intermediate.predict(corpus_data)
        self.assertTrue(
            numpy.allclose(intermediate_output[0], corr_step_5, rtol=1e-6))

        layer_name = 'cnn_conv_3'
        intermediate = Model(inputs=network.input,
                             outputs=network.get_layer(layer_name).output)
        intermediate_output = intermediate.predict(corpus_data)
        self.assertTrue(
            numpy.allclose(intermediate_output[0], corr_step_6, rtol=1e-6))

        layer_name = 'cnn_activation_3'
        intermediate = Model(inputs=network.input,
                             outputs=network.get_layer(layer_name).output)
        intermediate_output = intermediate.predict(corpus_data)
        self.assertTrue(
            numpy.allclose(intermediate_output[0], corr_step_7, rtol=1e-6))

        layer_name = 'cnn_pooling_2'
        intermediate = Model(inputs=network.input,
                             outputs=network.get_layer(layer_name).output)
        intermediate_output = intermediate.predict(corpus_data)
        self.assertTrue(
            numpy.allclose(intermediate_output[0], corr_step_8, rtol=1e-6))
        self.assertTrue(
            numpy.allclose(intermediate_output, corr_step_8_full, rtol=1e-6))

        layer_name = 'bridge_flatten'
        intermediate = Model(inputs=network.input,
                             outputs=network.get_layer(layer_name).output)
        intermediate_output = intermediate.predict(corpus_data)
        self.assertTrue(
            numpy.allclose(intermediate_output, corr_step_9, rtol=1e-6))
Ejemplo n.º 5
0
def proceed_document(dokumen_id):
    import numpy
    from dlnn.Dlnn import Dlnn
    from dlnn.Dlnn import DLNN_DEFAULT_CONFIG
    dlnn = Dlnn(**DLNN_DEFAULT_CONFIG)
    # Todo : Load Dokumen by id (doc_id) [Dokumen.objects.filter(id=doc_id).first()]
    dokumen = Dokumen.objects.filter(id=dokumen_id).first()
    dokumen.state = "Process"
    dokumen.save()
    # Todo : Load pdf
    # spell = SpellChecker()
    with open(dokumen.filenya.path, "rb") as f:
        pdf = pdftotext.PDF(f)
        text = "".join(pdf)

    # Todo : Normalisasi
    # pecah kalimat menjadi kata kata
    text = text.lower()  # Converting to lowercase
    cleanr = re.compile('<.*?>')
    sentence = re.sub(cleanr, ' ', text)  # Removing HTML tags
    sentence = re.sub(r'[?|!|\'|"|#]', r'', sentence)
    sentence = re.sub(r'[.|,|)|(|\|/]', r' ',
                      sentence)  # Removing Punctuations

    data_pdf = "".join(sentence)
    token_data_pdf = nltk.word_tokenize(data_pdf, preserve_line=True)

    # Fitur 1 - cek salah ketik Bahasa Indonesia
    url_dic_indo = settings.STATIC_ROOT + '/admin/db_text/kamus_indonesia.txt'
    kamus_indonesia = open(url_dic_indo, "r")
    katadasar = kamus_indonesia.read().split('\n')
    for i in range(len(katadasar)):
        katadasar[i] = katadasar[i].split("/")[0]

    salah_ketik_indo = 0
    for token in token_data_pdf:
        if token not in katadasar:
            salah_ketik_indo += 1

    f1 = salah_ketik_indo
    dokumen.fitur1 = f1
    dokumen.save()

    # Fitur 2 - cek salah ketik Bahasa Inggris
    url_dic_en = settings.STATIC_ROOT + '/admin/db_text/kamus_english.txt'
    kamus_inggris = open(url_dic_en, "r")
    katadasar_en = kamus_inggris.read().split('\n')
    for i in range(len(katadasar_en)):
        katadasar_en[i] = katadasar_en[i].split("/")[0]

    salah_ketik_english = 0
    for token in token_data_pdf:
        if token not in katadasar_en:
            salah_ketik_english += 1

    f2 = salah_ketik_english
    dokumen.fitur2 = f2
    dokumen.save()

    f3, f4 = calculate_feature_34(dokumen_id)
    dokumen.fitur3 = f3
    dokumen.fitur4 = f4
    dokumen.save()

    f5, f6 = calculate_feature_56(dokumen_id)
    dokumen.fitur5 = f5
    dokumen.fitur6 = f6
    dokumen.save()

    # Todo : masukkan fitur f[1..4] ke database
    network = dlnn.get_model()
    result = network.predict(numpy.array([[f1, f2, f3, f4, f5, f6]]),
                             batch_size=1)
    class_data = result.argmax(axis=1)[0]
    # print("Class Data {}".format(class_data))
    # Todo : masukkan class_data sebagai hasil kelas data [mappingkan dengan kelas seharusnya] [zero based indexing]
    dokumen.kualitas = class_data
    dokumen.state = "Done"
    dokumen.save()