import my_function as func if __name__ == "__main__": tfidf = func.load_list("tf_idf") woorkbook = func.create_excel() woorkbook.create_sheet('Silhouette') sheet1 = woorkbook['Silhouette'] s_avg = func.clustering(tfidf) func.save_list("s_avg", s_avg) for i in range(len(s_avg)): sheet1.cell(row=i + 1, column=1).value = i + 2 sheet1.cell(row=i + 1, column=2).value = s_avg[i] tfidf = func.load_list("tf_idf_2") woorkbook.create_sheet('Silhouette_2') sheet1 = woorkbook['Silhouette_2'] s_avg = func.clustering(tfidf) func.save_list("s_avg_2", s_avg) for i in range(len(s_avg)): sheet1.cell(row=i + 1, column=1).value = i + 2 sheet1.cell(row=i + 1, column=2).value = s_avg[i] func.save_excel('Clustering', woorkbook)
import my_function as func if __name__== "__main__": #Uni Gram stemming = func.load_list("stemming") collecting_fiture = func.load_list("new_collecting_fiture") woorkbook = func.create_excel() print("Load Tf....") tf_a = func.tf(stemming,collecting_fiture,woorkbook,'tf') func.save_list("tf",tf_a) func.save_excel('tf',woorkbook) woorkbook = func.create_excel() print("Load idf...") idf_a = func.idf(tf_a) func.add_sheet('idf', idf_a,woorkbook) func.save_list("idf",idf_a) func.save_excel('idf',woorkbook) woorkbook = func.create_excel() print("Load tf_idf...") tf_idf_a = func.tf_idf(tf_a,idf_a,collecting_fiture,woorkbook) func.save_list("tf_idf",tf_idf_a) func.save_excel('tf_idf',woorkbook) #Bi Gram n_gram = func.load_list("token_2")
import my_function as func if __name__== "__main__": #Uni Gram print("Load data......") data = func.load_list("data") woorkbook = func.create_excel() print("Lowercase......") lowercase = func.lowercase(data) func.add_sheet('lowercase', lowercase,woorkbook) func.save_list("lowercase",lowercase) print("Remove symbol......") symbol_remover = func.symbol_remover(lowercase) func.add_sheet('remove symbol', symbol_remover,woorkbook) func.save_list("symbol",symbol_remover) print("Tokenisasi......") tokenisasi = func.tokenisasi(symbol_remover) for i in range(len(tokenisasi)): tokenisasi[i] = [x for x in tokenisasi[i] if x != "b"] tokenisasi[i] = [x for x in tokenisasi[i] if x != "suara"] tokenisasi[i] = [x for x in tokenisasi[i] if x != "com"] func.add_sheet_list('tokenisasi', tokenisasi,woorkbook) func.save_list("token",tokenisasi) print("Stopword......") stopword_s = func.stopword_s(tokenisasi) func.add_sheet_list('stopwords', stopword_s,woorkbook) func.save_list("stopword",stopword_s)
nav += 1 if depth > count_depth: cek = False else: url = listUrl[nav] req = requests.get(url) soup = BeautifulSoup(req.text, 'html.parser') news_links = soup.find_all('a', {'class': 'ellipsis3'}, href=True) node_A = node.index(url) for link in news_links: listUrl.append(link['href']) if link['href'] not in node: node.append(link['href']) node_B = node.index(link['href']) value = (str(node_A), str(node_B)) edge.append(value) nav += 1 if __name__ == "__main__": listUrl = [] node = [] edge = [] links = getLinks() func.save_list("node", node) func.save_list("edge", edge) func.save_list("listUrl", listUrl)
import my_function as func if __name__ == "__main__": vsm = func.load_list("vsm") collecting_fiture = func.load_list("collecting_fiture") new_vsm, new_collecting_fiture = func.drop_highly_correlation( vsm, collecting_fiture) func.save_list("new_vsm", new_vsm) func.save_list("new_collecting_fiture", new_collecting_fiture) vsm_n_gram = func.load_list("vsm_2") collecting_fiture_n_gram = func.load_list("collecting_fiture_2") new_vsm_n_gram, new_collecting_fiture_n_gram = func.drop_highly_correlation( vsm_n_gram, collecting_fiture_n_gram) func.save_list("new_vsm_2", new_vsm_n_gram) func.save_list("new_collecting_fiture_2", new_collecting_fiture_n_gram)
import my_function as func if __name__ == "__main__": crawl = func.crawl_web('https://www.suara.com/indeks/terkini/all/2019') func.save_content(crawl) func.save_list("crawl", crawl)