Beispiel #1
0
 def proses(test_str):
     kata_pengganti = ""
     katadasar = tampil.Tampil_KataDasar()
     #katadasar = json.JSONDecoder().decode(katadasar)
     katadasar = [i[0] for i in katadasar]
     query = preprocessing.PreProcess(test_str)
     query = query.split()
     hasil = []
     for i in range(len(query)):
         if query[i] in katadasar:
             hasil.append(query[i])
         else:
             word = bigram.urut(query[i])
             nilai = 0
             for j in range(len(katadasar)):
                 kata_urut = bigram.urut(katadasar[j])
                 nilai_tertinggi = jaccard.compute_jaccard_similarity_score(
                     word, kata_urut)
                 if nilai_tertinggi > nilai:
                     print(kata_urut)
                     print(nilai_tertinggi)
                     nilai = nilai_tertinggi
                     kata_pengganti = katadasar[j]
             hasil.append(kata_pengganti)
     #print(katadasar[2])
     #hasil.append(bigram.urut(query[i]))
     spasi = " "
     hasil = spasi.join(hasil)
     return hasil
Beispiel #2
0
def createDictionary():
   wordsAdded = {}
   cwd = os.getcwd()
   data = tampil.Tampil_Hadis()
   folder = "Hadits Bukhari-Muslim/"
   fileList = []
   for i in range(len(data)):
      fileList.append(data[i][2])

   for file in fileList:
      
      with open(folder+file, 'r') as f:

         words = f.read()
         words = preprocessing.PreProcess(words)
         words = urut(words)
         words = urut2(words)
         for word in words:
            if word not in wordsAdded.keys():
               wordsAdded[word] = [f.name]
            else:
                if file not in wordsAdded[word]:
                  wordsAdded[word] += [f.name]
                  
   #return wordsAdded
   with open('indexing.txt', 'w') as json_file:
       json.dump(wordsAdded, json_file)
def proses_Pencarian(value):
        waktu_awal = time.time()
        waktu_prepro_a = time.time()
        query = preprocessing.PreProcess(value) #Preprocessing query
        waktu_prepro_b = time.time()
        waktu_proses_pre = waktu_prepro_b - waktu_prepro_a
        print("Waktu Proses PrePro query "+str(waktu_proses_pre))

        waktu_bigram_a = time.time()
        value = bigram.proses(query) #Bigram query
        waktu_bigram_b = time.time()
        waktu_proses_bigram = waktu_bigram_b - waktu_bigram_a
        print("Waktu Proses bigram query "+str(waktu_proses_bigram))
        
        typo = ''
        if value != query:
           typo = value

        value = value.split() #Bagi perkata hasil typo/query
        f = open('indexing.json',) 
        indexing = json.load(f)

        #Mengecek query di indexing
        hasil = []
        for i in range(len(value)):
            if value[i] in indexing:
                hasil = hasil + indexing[value[i]]
        f.close()

        doc = []
        for x in range(len(hasil)):
            if hasil[x] not in doc:
                doc.append(hasil[x])

        #Ambil Data dari Database sesuai dengan dokumen yang cocok dengan query
        data = []
        dokumen = tampil.Tampil_Hadis()
        for k in range(len(doc)):
            for l in range(len(dokumen)):
                init = "Hadits Bukhari-Muslim/"+dokumen[l][2]
                #print(init)
                if doc[k] == init:
                    data.append(dokumen[l])

        #Menghitung nilai jaccard similarity tiap dokumen terhadap query
        waktu_jaccard_a = time.time()
        rank = []
        folder = "Hadits Bukhari-Muslim/"
        for j in range(len(data)):
            alamat = folder+data[j][2]
            words = jaccard.urut(data[j][3])
            nilai = jaccard.compute_jaccard_similarity_score(value, words)
            result = [alamat, nilai]
            rank.append(result)
        waktu_jaccard_b = time.time()
        waktu_proses_jaccard = waktu_jaccard_b - waktu_jaccard_a
        print("Waktu Proses jaccard "+str(waktu_proses_jaccard))

        #sorting dokumen        
        rank = sorted(rank, key=lambda x: x[1], reverse=True)
        waktu_akhir = time.time()
        waktu_proses1 = waktu_akhir - waktu_awal

        return typo, rank, waktu_proses1