import my_function as func

if __name__ == "__main__":

    tfidf = func.load_list("tf_idf")

    woorkbook = func.create_excel()
    woorkbook.create_sheet('Silhouette')
    sheet1 = woorkbook['Silhouette']

    s_avg = func.clustering(tfidf)
    func.save_list("s_avg", s_avg)

    for i in range(len(s_avg)):
        sheet1.cell(row=i + 1, column=1).value = i + 2
        sheet1.cell(row=i + 1, column=2).value = s_avg[i]

    tfidf = func.load_list("tf_idf_2")
    woorkbook.create_sheet('Silhouette_2')
    sheet1 = woorkbook['Silhouette_2']

    s_avg = func.clustering(tfidf)
    func.save_list("s_avg_2", s_avg)

    for i in range(len(s_avg)):
        sheet1.cell(row=i + 1, column=1).value = i + 2
        sheet1.cell(row=i + 1, column=2).value = s_avg[i]

    func.save_excel('Clustering', woorkbook)
import my_function as func

if __name__== "__main__":
    
    #Uni Gram
    
    stemming = func.load_list("stemming")
    collecting_fiture = func.load_list("new_collecting_fiture")
    
    woorkbook = func.create_excel()
    print("Load Tf....")
    tf_a = func.tf(stemming,collecting_fiture,woorkbook,'tf')
    func.save_list("tf",tf_a)
    func.save_excel('tf',woorkbook)
    
    woorkbook = func.create_excel()
    print("Load idf...")
    idf_a = func.idf(tf_a)
    func.add_sheet('idf', idf_a,woorkbook)
    func.save_list("idf",idf_a)
    func.save_excel('idf',woorkbook)
    
    woorkbook = func.create_excel()
    print("Load tf_idf...")
    tf_idf_a = func.tf_idf(tf_a,idf_a,collecting_fiture,woorkbook)
    func.save_list("tf_idf",tf_idf_a)
    func.save_excel('tf_idf',woorkbook)
    
    #Bi Gram
    
    n_gram = func.load_list("token_2")
import my_function as func

if __name__== "__main__":
    #Uni Gram
    print("Load data......")
    data = func.load_list("data")
    woorkbook = func.create_excel()
    
    print("Lowercase......")
    lowercase = func.lowercase(data)
    func.add_sheet('lowercase', lowercase,woorkbook)
    func.save_list("lowercase",lowercase)
    
    print("Remove symbol......")
    symbol_remover = func.symbol_remover(lowercase)
    func.add_sheet('remove symbol', symbol_remover,woorkbook)
    func.save_list("symbol",symbol_remover)
    
    print("Tokenisasi......")
    tokenisasi = func.tokenisasi(symbol_remover)
    for i in range(len(tokenisasi)):
        tokenisasi[i] = [x for x in tokenisasi[i] if x != "b"]
        tokenisasi[i] = [x for x in tokenisasi[i] if x != "suara"]
        tokenisasi[i] = [x for x in tokenisasi[i] if x != "com"]
    func.add_sheet_list('tokenisasi', tokenisasi,woorkbook)
    func.save_list("token",tokenisasi)
    
    print("Stopword......")
    stopword_s = func.stopword_s(tokenisasi)
    func.add_sheet_list('stopwords', stopword_s,woorkbook)
    func.save_list("stopword",stopword_s)
            nav += 1

        if depth > count_depth:
            cek = False

        else:
            url = listUrl[nav]
            req = requests.get(url)
            soup = BeautifulSoup(req.text, 'html.parser')
            news_links = soup.find_all('a', {'class': 'ellipsis3'}, href=True)
            node_A = node.index(url)
            for link in news_links:
                listUrl.append(link['href'])
                if link['href'] not in node:
                    node.append(link['href'])
                node_B = node.index(link['href'])
                value = (str(node_A), str(node_B))
                edge.append(value)
            nav += 1


if __name__ == "__main__":
    listUrl = []
    node = []
    edge = []

    links = getLinks()

    func.save_list("node", node)
    func.save_list("edge", edge)
    func.save_list("listUrl", listUrl)
import my_function as func

if __name__ == "__main__":
    vsm = func.load_list("vsm")
    collecting_fiture = func.load_list("collecting_fiture")
    new_vsm, new_collecting_fiture = func.drop_highly_correlation(
        vsm, collecting_fiture)
    func.save_list("new_vsm", new_vsm)
    func.save_list("new_collecting_fiture", new_collecting_fiture)

    vsm_n_gram = func.load_list("vsm_2")
    collecting_fiture_n_gram = func.load_list("collecting_fiture_2")
    new_vsm_n_gram, new_collecting_fiture_n_gram = func.drop_highly_correlation(
        vsm_n_gram, collecting_fiture_n_gram)
    func.save_list("new_vsm_2", new_vsm_n_gram)
    func.save_list("new_collecting_fiture_2", new_collecting_fiture_n_gram)
Esempio n. 6
0
import my_function as func

if __name__ == "__main__":
    crawl = func.crawl_web('https://www.suara.com/indeks/terkini/all/2019')
    func.save_content(crawl)
    func.save_list("crawl", crawl)