def select_komposisiModel(self): model_name = request.form['model_name'] instance_Model = Models( "SELECT sentiment_count, sentiment_positive, sentiment_negative FROM tbl_model WHERE model_name = '" + model_name + "'") komposisi_model = instance_Model.select() return komposisi_model
def count_dataTraining(self): # SELECT data training instance_Model = Models( 'SELECT COUNT(id) as jumlah FROM tbl_tweet_training WHERE clean_text IS NOT NULL AND sentiment_type IS NOT NULL' ) data_training = instance_Model.select() return data_training[0]['jumlah']
def add_dataCrawling(self): aksi = request.form['aksi'] instance_Api = Api() instance_Excel = Excel() # Fungsi CARI TWEET(crawling) : Ambil data menggunakan API Twitter ==> Simpan(data) ke Excel & Tampilkan(data) ke layar if aksi == 'crawling': kata_kunci = request.form['kata_kunci'] tanggal_awal = request.form['tanggal_awal'] tanggal_akhir = request.form['tanggal_akhir'] # Ambil data menggunakan API Twitter (Tweepy) data_crawling = instance_Api.get_search( kata_kunci + ' -filter:retweets', tanggal_awal, tanggal_akhir) # Fungsi[1] : Simpan data_crawling ke dalam file Excel instance_Excel.save_excel_crawling(data_crawling) # Menampilkan data_crawling ke layar return json.dumps({'data_crawling': data_crawling}) # Fungsi SIMPAN TWEET(crawling) : Ambil data dari excel(yang telah disimpan[1]) ==> Simpan ke Database if aksi == 'save_crawling': # Fungsi[2] : Membuat tuple dari file excel tuples_excel = instance_Excel.make_tuples_crawling() # Simpan ke Database dengan VALUES berupa tuple dari Fungsi[2] instance_Model = Models( 'REPLACE INTO tbl_tweet_crawling(id, text, user, created_at) VALUES (%s, %s, %s, %s)' ) instance_Model.query_sql_multiple(tuples_excel) return None
def delete_dataStopword(self): id = request.form['id'] instance_Model = Models( 'DELETE FROM tbl_stopword WHERE id_stopword = %s') instance_Model.query_sql(id) flash('Berhasil menghapus data.', 'success') return None
def add_dataLabeling(self): id = request.form['id'] value = request.form['value'] data_ubah = (value, id) instance_Model = Models('UPDATE tbl_tweet_clean SET sentiment_type=%s WHERE id=%s') instance_Model.query_sql(data_ubah) return 'Berhasil Melabeli Data!'
def delete_dataPositiveWord(self): id = request.form['id'] instance_Model = Models( 'DELETE FROM tbl_lexicon_positive WHERE id_positive = %s') instance_Model.query_sql(id) flash('Berhasil menghapus data.', 'success') return None
def add_dataPositiveWord(self): kata_positif = request.form['kata_positif'].strip() instance_Model = Models( 'INSERT INTO tbl_lexicon_positive(positive_word) VALUES (%s)') instance_Model.query_sql(kata_positif.lower()) flash('Berhasil menambahkan data.', 'success') return None
def add_dataStopword(self): stopword = request.form['stopword'].strip() instance_Model = Models( 'INSERT INTO tbl_stopword(stopword) VALUES (%s)') instance_Model.query_sql(stopword.lower()) flash('Berhasil menambahkan data.', 'success') return None
def update_dataStopword(self): id = request.form['id'] stopword = request.form['stopword'].strip() data_ubah = (stopword.lower(), id ) # Membuat tupple dari form data masukan instance_Model = Models( 'UPDATE tbl_stopword SET stopword=%s WHERE id_stopword = %s') instance_Model.query_sql(data_ubah) flash('Berhasil mengubah data.', 'success') return None
def add_dataSlangword(self): slangword = request.form['slangword'].strip() kata_asli = request.form['kata_asli'].strip() data_tambah = (slangword.lower(), kata_asli.lower() ) # Membuat tupple dari form data masukan instance_Model = Models( 'INSERT INTO tbl_slangword(slangword, kata_asli) VALUES (%s,%s)') instance_Model.query_sql(data_tambah) flash('Berhasil menambahkan data.', 'success') return None
def update_dataPositiveWord(self): id = request.form['id'] kata_positif = request.form['kata_positif'].strip() data_ubah = (kata_positif.lower(), id ) # Membuat tupple dari form data masukan instance_Model = Models( 'UPDATE tbl_lexicon_positive SET positive_word=%s WHERE id_positive = %s' ) instance_Model.query_sql(data_ubah) flash('Berhasil mengubah data.', 'success') return None
def delete_dataModelling(self): id = request.form['id'] instance_Model = Models('DELETE FROM tbl_model WHERE model_name = %s') instance_Model.query_sql(id) if os.path.exists('application/static/model_data/' + id): os.remove('application/static/model_data/' + id) flash( 'Berhasil menghapus data. File (.json) model latih berhasil dihapus!', 'success') else: flash('File (.json) model latih gagal dihapus!', 'error') print("\nFile tidak ditemukan!\n")
def import_fileExcelStopword(self): excel_file = request.files['excel_file'] if (excel_file.filename.lower().endswith(('.xls', '.xlsx'))): instance_Excel = Excel() tuples_excel = instance_Excel.make_tuples_stopword(excel_file) # Simpan ke Database dengan VALUES berupa tuple instance_Model = Models( 'INSERT INTO tbl_stopword(stopword) VALUES (%s)') instance_Model.query_sql_multiple(tuples_excel) return None flash( 'Format file tidak sesuai! File excel harus ber-ekstensi .xls atau .xlsx', 'error') return None
def import_fileExcelCrawling(self): excel_file = request.files['excel_file'] if (excel_file.filename.lower().endswith(('.xls', '.xlsx'))): instance_Excel = Excel() tuples_excel = instance_Excel.make_tuples_crawling(excel_file) # Simpan ke Database dengan VALUES berupa tuple instance_Model = Models( 'REPLACE INTO tbl_tweet_crawling(id, text, user, created_at) VALUES (%s, %s, %s, %s)' ) instance_Model.query_sql_multiple(tuples_excel) return None flash( 'Format file tidak sesuai! File excel harus ber-ekstensi .xls atau .xlsx', 'error') return None
def count_sampleSentiment(self): # SELECT jumlah data training berdasarkan jenis sentimen instance_Model = Models( 'SELECT COUNT(id) as jumlah FROM tbl_tweet_training WHERE clean_text IS NOT NULL AND sentiment_type IS NOT NULL GROUP BY sentiment_type' ) data_max_sentiment = instance_Model.select() min = 999999 # asumsi jumlah minimal sentimen tidak lebih dari 999999 # mencari jumlah minimal sentimen for data in data_max_sentiment: if data['jumlah'] < min: min = data['jumlah'] if min == 999999: min = 0 # nilai variable 'min' digunakan sebagai batas atas sample sentimen & nilai 'min*2' digunakan untuk mengetahui jumlah kuota sample return min, min * 2
def check_evaluation(self): nilai_k = int(request.form['nilai_k']) model_name = request.form['model_name'] # Select data dari tbl_tweet_testing yang telah diberi label instance_Model = Models( 'SELECT text, clean_text, sentiment_type FROM tbl_tweet_testing WHERE sentiment_type IS NOT NULL' ) tweet_testing_label = instance_Model.select() tweet_list = [] teks_list = [] label_list = [] for tweet in tweet_testing_label: tweet_list.append(tweet['text']) teks_list.append(tweet['clean_text']) label_list.append(tweet['sentiment_type']) # Memuat kembali model yang telah dibuat pada proses Pemodelan model = json.load(open('application/static/model_data/' + model_name)) # akses ke kelas Vectorizer instance_Vectorizer = Vectorizer(teks_list, label_list) # membuat vektor berdasarkan model latih vector_list = instance_Vectorizer.test_vectorList(model) # akses ke kelas KNearestNeighbors instance_Klasification = KNearestNeighbors(nilai_k, model) data_dict = instance_Klasification.predict_labelList(vector_list) confusion_matrix = self.confusion_matrix(label_list, data_dict['label_prediction']) # Membandingkan hasil prediksi (hasil) dengan sentimen yang sebenarnya (label_list) return json.dumps({ 'tweet_database': tweet_list, 'teks_database': teks_list, 'sentimen_database': label_list, 'data_dict': data_dict, 'confusion_matrix': confusion_matrix })
def login(self): username = request.form['username'].strip() kata_sandi = request.form['kata_sandi'].strip() # cek record (ada?) instance_Model = Models( 'SELECT password, fullname FROM tbl_users WHERE username = %s') user = instance_Model.select_row(username) # record ditemukan if user != None: # cek kecocokan kata sandi if 'textmining_' + str(kata_sandi) + '_sentimentanalysis' == list( user)[0]: # session['username'] = username session['fullname'] = list(user)[1] return True flash('Username atau Kata Sandi tidak sesuai.') return False flash('Username tidak terdaftar.') return False
def register(self): username = request.form['username'].strip() kata_sandi = request.form['kata_sandi'].strip() fullname = request.form['fullname'].strip() # cek record (ada?) instance_Model = Models( 'SELECT password, fullname FROM tbl_users WHERE username = %s') user = instance_Model.select_row(username) # record TIDAK ditemukan if user == None: data_tambah = (username, 'textmining_' + str(kata_sandi) + '_sentimentanalysis', fullname) # simpan data user instance_Model = Models( 'INSERT INTO tbl_users(username, password, fullname) VALUES (%s, %s, %s)' ) instance_Model.query_sql(data_tambah) flash('Berhasil mendaftar, silakan masuk.', 'success') return True flash('Username telah terdaftar.', 'error') return False
def create_dataModeling(self): sample_positive = request.form['sample_positive'] sample_negative = request.form['sample_negative'] jumlah_sample = int(sample_positive) + int(sample_negative) # if sample_positive == sample_negative == sample_netral: if sample_positive == sample_negative: list_data = [ ] # wadah untuk menyimpan data yang diperoleh dari database # Select data positif dari tbl_tweet_training sebanyak n record (berdasarkan variabel sample) instance_Model = Models( "SELECT clean_text, sentiment_type FROM tbl_tweet_training WHERE clean_text IS NOT NULL AND sentiment_type = 'positif' ORDER BY RAND() LIMIT " + sample_positive) list_data.append(instance_Model.select()) # Select data negatif dari tbl_tweet_training sebanyak n record (berdasarkan variabel sample) instance_Model = Models( "SELECT clean_text, sentiment_type FROM tbl_tweet_training WHERE clean_text IS NOT NULL AND sentiment_type = 'negatif' ORDER BY RAND() LIMIT " + sample_negative) list_data.append(instance_Model.select()) teks_list = [ ] # wadah untuk tweet (clean_text) yang akan dijadikan sebagai model latih label_list = [ ] # wadah untuk sentimen (sentiment_type) yang akan dijadikan sebagai model latih # set data untuk teks_list dan label_list menggunakan data yang telah diambil dari database # for index_luar in range(3): for index_luar in range(2): for index_dalam in range(len(list_data[index_luar])): clean_text = list_data[index_luar][index_dalam][ 'clean_text'] sentiment_type = list_data[index_luar][index_dalam][ 'sentiment_type'] teks_list.append(clean_text) label_list.append(sentiment_type) # akses ke kelas Vectorizer instance_Vectorizer = Vectorizer(teks_list, label_list) # membuat vektor angka data_dict = instance_Vectorizer.create_vectorList() model_name = 'sentiment_model(' + datetime.today().strftime( '%d-%m-%Y %H%M%S') + ').json' # model_name = 'sentiment_model('+ datetime.today().strftime('%d-%m-%Y') +').json' # Menyimpan model kedalam bentuk .json agar dapat digunakan kembali (untuk proses Evaluasi & Prediksi) with open('application/static/model_data/' + model_name, 'w') as outfile: json.dump(data_dict, outfile, indent=4) # Membuat tuple untuk simpan data data_simpan = (model_name, jumlah_sample, sample_positive, sample_negative) # Insert model ke dalam database instance_Model = Models( 'REPLACE INTO tbl_model(model_name, sentiment_count, sentiment_positive, sentiment_negative) VALUES (%s, %s, %s, %s)' ) # Menjadikan tuple sebagai argumen untuk method query_sql instance_Model.query_sql(data_simpan) return { 'model_name': model_name, 'sentiment_count': jumlah_sample, 'sentiment_positive': sample_positive, 'sentiment_negative': sample_negative, 'data_dict': data_dict } return {'error': 'Gagal Membuat Model Latih'}
def select_dataModel(self): instance_Model = Models('SELECT * FROM tbl_model') data_model = instance_Model.select() return data_model
def select_dataWithLabel(self): # SELECT data tweet yang TELAH diberi label instance_Model = Models('SELECT * FROM tbl_tweet_clean WHERE sentiment_type IS NOT NULL') data_withLabel = instance_Model.select() return data_withLabel
def add_dataLabelingKamus(self): aksi = request.form['aksi'] # FUNGSI LABELING DENGAN KAMUS : Data teks bersih ==> Hitung Skor(Sentimen) ==> Pemberian Kelas Sentimen ==> Update ==> Tampilkan(data) ke layar if aksi == 'labelingKamus': # SELECT data tanpa label dari database instance_Model = Models('SELECT id, clean_text FROM tbl_tweet_clean WHERE sentiment_type IS NULL') data_noLabel = instance_Model.select() # SELECT data kata-kata & bobot positif dari database instance_Model = Models('SELECT positive_word FROM tbl_lexicon_positive') kamus_positive = instance_Model.select() # SELECT data kata-kata & bobot negative dari database instance_Model = Models('SELECT negative_word FROM tbl_lexicon_negative') kamus_negative = instance_Model.select() teks_data = [] # wadauh untuk clean_text agar bisa ditampilkan ke layar (response) skor_data = [] # wadauh untuk skor agar bisa ditampilkan ke layar (response) total_positive = [] # wadauh untuk jumlah positive agar bisa ditampilkan ke layar (response) total_negative = [] # wadauh untuk jumlah negatif agar bisa ditampilkan ke layar (response) jumlah_netral = 0 # menghitung tweet yang berskor = 0 atau sentimen netral data_ubah = [] print('\n-- PROSES '+ str(len(data_noLabel)) +' DATA --') # PRINT KE CMD for index, data_nL in enumerate(data_noLabel): # loop data tweet yang belum memiliki label skor = 0 count_positive = 0 count_negative = 0 # Menghitung jumlah skor pada teks bersih dengan kamus for clean_text in data_nL['clean_text'].split(): # Tokenizing for data_p in kamus_positive: # loop data kata positif if clean_text == data_p['positive_word']: skor += 1 count_positive += 1 break for data_n in kamus_negative: # loop data kata negatif if clean_text == data_n['negative_word']: skor -= 1 count_negative += 1 break # Klasifikasi sentimen berdasarkan skor if skor > 0: sentimen = 'positif' elif skor < 0: sentimen = 'negatif' else: jumlah_netral += 1 continue try: data_ubah.append((sentimen, data_nL['id'])) # Membuat tuple sebagai isian untuk kueri UPDATE # Simpan data ke list teks_data.append(data_nL['clean_text']) skor_data.append(skor) total_positive.append(count_positive) total_negative.append(count_negative) except: print('\nGagal Mengubah Data '+ str(data['id']) +'\n') return None print(index+1) # PRINT KE CMD # Menyimpan sentimen hasil dengan kueri UPDATE instance_Model = Models('UPDATE tbl_tweet_clean SET sentiment_type=%s WHERE id = %s') instance_Model.query_sql_multiple(data_ubah) print('\n-- SELESAI --\n') # PRINT KE CMD # Menampilkan data ke layar return json.dumps({ 'teks_data': teks_data, 'total_positive': total_positive, 'total_negative': total_negative, 'skor_data': skor_data, 'jumlah_netral': jumlah_netral })
def delete_allDataCrawling(self): instance_Model = Models('DELETE FROM tbl_tweet_crawling') instance_Model.query_deleteAll() return None
def delete_allDataLabeling(self): instance_Model = Models('DELETE FROM tbl_tweet_clean WHERE sentiment_type IS NOT NULL') instance_Model.query_deleteAll() return None
def count_dataNoLabel(self): # SELECT jumlah clean data yang tidak memiliki label instance_Model = Models('SELECT COUNT(id) as jumlah FROM tbl_tweet_clean WHERE sentiment_type IS NULL') data_crawling = instance_Model.select() return data_crawling[0]['jumlah']
def get_visualisasiHasil(self): waktu_sekarang = datetime.today().strftime('%d-%m-%Y %H%M') try: # HISTOGRAM DISTRIBUSI WAKTU [START] instance_Model = Models( 'SELECT DATE(created_at) as tanggal FROM tbl_tweet_clean WHERE clean_text IS NOT NULL AND sentiment_type IS NOT NULL' ) data_distribusi_waktuTweet = instance_Model.select() # membuat list tanggal list_tanggal = [ str(data['tanggal']) for data in data_distribusi_waktuTweet ] # set ukuran figure plt.subplots(figsize=(25, 10)) plt.hist(list_tanggal, bins=125) # mengatur label plt.ylabel('Jumlah Tweet', fontsize=18) plt.xlabel('Tanggal Perolehan', fontsize=18) plt.xticks(rotation=45) # memunculkan garis pada figure plt.grid() # Simpan data matplotlib plt.savefig( 'application/static/matplotlib/hist_distribusi_waktu(' + waktu_sekarang + ').png') # reset setting matplotlib menjadi default plt.cla() plt.clf() # HISTOGRAM DISTRIBUSI WAKTU [END] # PIE CHART SENTIMEN [START] instance_Model = Models( "SELECT COUNT(id) as jumlah FROM tbl_tweet_clean WHERE clean_text IS NOT NULL AND sentiment_type = 'positif'" ) data_sentimentPositif = instance_Model.select() instance_Model = Models( "SELECT COUNT(id) as jumlah FROM tbl_tweet_clean WHERE clean_text IS NOT NULL AND sentiment_type = 'negatif'" ) data_sentimentNegatif = instance_Model.select() data_P = int(data_sentimentPositif[0]['jumlah']) data_N = int(data_sentimentNegatif[0]['jumlah']) # membuat persentase data jumlah_data = data_P + data_N persentase_P = (data_P / jumlah_data) * 100 persentase_N = (data_N / jumlah_data) * 100 # membulatkan menjadi 2 desimal di belakang titik (.) persentase_P = round(persentase_P, 2) persentase_N = round(persentase_N, 2) list_countSentiment = [persentase_P, persentase_N] # set ukuran figure plt.subplots(figsize=(10, 10)) plt.pie(list_countSentiment, labels=[ 'Positif (' + str(persentase_P) + ' %)', 'Negatif (' + str(persentase_N) + ' %)' ], colors=['#00c853', '#ff1744'], startangle=90) plt.legend(title=" Tipe Sentimen ") # Simpan data matplotlib plt.savefig('application/static/matplotlib/pie_sentiment(' + waktu_sekarang + ').png') # reset setting matplotlib menjadi default plt.cla() plt.clf() # PIE CHART SENTIMEN [END] # WORDCLOUD SENTIMEN [START] instance_Model = Models( "SELECT clean_text FROM tbl_tweet_clean WHERE clean_text IS NOT NULL AND sentiment_type = 'positif'" ) data_sentimentPositif = instance_Model.select() instance_Model = Models( "SELECT clean_text FROM tbl_tweet_clean WHERE clean_text IS NOT NULL AND sentiment_type = 'negatif'" ) data_sentimentNegatif = instance_Model.select() string_dataPositif = "" for data in data_sentimentPositif: string_dataPositif += str(data['clean_text']) + " " string_dataNegatif = "" for data in data_sentimentNegatif: string_dataNegatif += str(data['clean_text']) + " " wordcloud = WordCloud( width=800, height=400, background_color='black', collocations=False).generate(string_dataPositif) wordcloud.to_file( 'application/static/wordcloud/wordcloud_visualisasiPositive(' + waktu_sekarang + ').png') wordcloud = WordCloud( width=800, height=400, background_color='black', collocations=False).generate(string_dataNegatif) wordcloud.to_file( 'application/static/wordcloud/wordcloud_visualisasiNegative(' + waktu_sekarang + ').png') # WORDCLOUD SENTIMEN [END] except: if os.path.exists( 'application/static/matplotlib/hist_distribusi_waktu(' + waktu_sekarang + ').png'): os.remove( 'application/static/matplotlib/hist_distribusi_waktu(' + waktu_sekarang + ').png') else: print("\nFile tidak ditemukan!\n") if os.path.exists('application/static/matplotlib/pie_sentiment(' + waktu_sekarang + ').png'): os.remove('application/static/matplotlib/pie_sentiment(' + waktu_sekarang + ').png') else: print("\nFile tidak ditemukan!\n") return {'error': 'Terjadi Kesalahan!'} # mencari frekuensi dari kata sentimen positif counts = {} for word in string_dataPositif.split(): if word in counts: counts[word] += 1 else: counts[word] = 1 frekuensi_P = dict( sorted(counts.items(), key=operator.itemgetter(1), reverse=True)) # mencari frekuensi dari kata sentimen negatif counts = {} for word in string_dataNegatif.split(): if word in counts: counts[word] += 1 else: counts[word] = 1 frekuensi_N = dict( sorted(counts.items(), key=operator.itemgetter(1), reverse=True)) # set data yang akan dikembalikan data = { 'jumlah_tweets': len(list_tanggal), 'jumlah_p': data_P, 'jumlah_n': data_N, 'persentase_p': persentase_P, 'persentase_n': persentase_N, 'frekuensi_p': list(frekuensi_P.items())[:15], 'frekuensi_n': list(frekuensi_N.items())[:15], 'waktu': waktu_sekarang } return data
def select_dataStopword(self): instance_Model = Models('SELECT * FROM tbl_stopword') data_stopword = instance_Model.select() return data_stopword
def delete_allDataStopWord(self): instance_Model = Models('DELETE FROM tbl_stopword') instance_Model.query_deleteAll() return None
def select_dataNoLabel(self): # SELECT data tweet yang BELUM diberi label instance_Model = Models('SELECT id, text, clean_text FROM tbl_tweet_clean WHERE sentiment_type IS NULL') data_noLabel = instance_Model.select() return data_noLabel
def select_dataCrawling(self): instance_Model = Models('SELECT * FROM tbl_tweet_crawling') data_crawling = instance_Model.select() return data_crawling