コード例 #1
0
def get_sentimental(model_name_input):
    model_name = model_name_input
    dbc = DBcontroller()
    good = 0
    bad = 0
    reply_ref_tables = dbc.execQuery(
        "select REPLY_REF_TABLE from MEDIA_INFO where MODEL_NAME='{}'".format(
            model_name))

    for reply_table_index in range(len(reply_ref_tables)):
        reply_table = reply_ref_tables[reply_table_index][0]
        sentimental_array = dbc.execQuery(
            "select SENTIMENTAL from {}".format(reply_table))
        for idx in range(len(sentimental_array)):
            sentimental = sentimental_array[idx][0]
            if (sentimental == None):
                continue
            elif (sentimental < 0):
                bad = bad + sentimental
            elif (sentimental > 0):
                good = good + sentimental

    model_infos = dbc.execQuery(
        "select MODEL_NAME, COUNT(MODEL_NAME), SUM(NUM_OF_REPLY) from MEDIA_INFO where MODEL_NAME='{}' GROUP BY MODEL_NAME"
        .format(model_name))

    num_of_video = model_infos[0][1]
    num_of_reply = model_infos[0][2]
    print(model_name, good, bad, num_of_video, num_of_reply)


# dbc = DBcontroller()
# for idx in range(1303, 1312):
#     table_name = 'ap_p_ipxs_'+str(idx)
#     dbc.execQuery("delete from MEDIA_INFO where REPLY_REF_TABLE like '{}'".format(table_name))
コード例 #2
0
def comment_proto():

    modelname = '아이폰 XS'
    output = 'a.txt'

    f = open("./" + output, "w", encoding='utf-8')
    dbc = DBcontroller()
    result = dbc.execQuery(
        "select REPLY_REF_TABLE from MEDIA_INFO where MODEL_NAME='{}'".format(
            modelname))
    video_count = len(result)
    commend_count = 0
    for val in result:
        # val[0]
        now = dbc.execQuery("select CONTENT from {}".format(val[0]))
        commend_count = len(now) + commend_count
        for b in now:
            if (len(b) == 0):
                pass
            else:
                f.write(b[0])
                f.write(
                    "\n============================================================================================\n"
                )

    f.close()
    print("모델 : ", modelname)
    print(video_count, "개의 동영상의", commend_count, "개의 댓글을 ", output, "에 저장하였음")
コード例 #3
0
def whole_comment_to_txt_file():
    dbc = DBcontroller()
    comment_table_list = []
    f = open("result.txt", 'w', encoding='utf-8')
    count_ = 0

    result = dbc.execQuery("select REPLY_REF_TABLE from MEDIA_INFO")

    for a in result:
        comment_table_list.append(a[0])

    # for table in commit_table_list:
    #     result = dbc.execQuery("select CONTENT from {}".format(table))
    #     target_comment = result[0][0]
    #     count_ = len(result) + count_

    # first = len(count_)//3
    # second
    # third
    for table in comment_table_list:
        result = dbc.execQuery("select CONTENT from {}".format(table))
        for a in result:
            target_comment = a[0].replace("\n", " ")
            f.write(str(target_comment))
            f.write("\n")
            count_ = len(a) + count_
        print(count_)

    f.close()
コード例 #4
0
def insert_sentimental_summery():
    dbc = DBcontroller()
    model_info = dbc.execQuery("select MODEL_NAME, MODEL_CODE from MODEL_INFO")
    for model_idx in range(len(model_info)):
        model_name = model_info[model_idx][0]
        model_code = model_info[model_idx][1]
        good_table_name = model_code + "_SENTIMENTAL_GOOD"
        bad_table_name = model_code + "_SENTIMENTAL_BAD"
        dbc.execQuery(
            "update MODEL_INFO set GOOD='{}' where MODEL_NAME='{}'".format(
                good_table_name, model_name))
        dbc.execQuery(
            "update MODEL_INFO set BAD='{}' where MODEL_NAME='{}'".format(
                bad_table_name, model_name))
        reply_ref_tables = dbc.execQuery(
            "select REPLY_REF_TABLE from MEDIA_INFO where MODEL_NAME='{}'".
            format(model_name))
        make_sentimental_table(good_table_name, dbc)
        make_sentimental_table(bad_table_name, dbc)
        for idx in range(len(reply_ref_tables)):
            reply_table_name = reply_ref_tables[idx][0]

            reply_good = dbc.execQuery(
                "insert into {} (select * from {} where SENTIMENTAL > 3)".
                format(good_table_name, reply_table_name))
            reply_bad = dbc.execQuery(
                "insert into {} (select * from {} where SENTIMENTAL < -3)".
                format(bad_table_name, reply_table_name))
コード例 #5
0
def get_comment_by_model(file=False):
    dbc = DBcontroller()

    models = dbc.execQuery("select distinct MODEL_NAME from MEDIA_INFO")
    model_count = len(models)
    model_list = []
    model_comment_array = [[] for _ in range(model_count)]
    for idx in range(model_count):
        model_list.append(models[idx][0])
    for idx in range(model_count):
        table_names = dbc.execQuery(
            "select REPLY_REF_TABLE from MEDIA_INFO where MODEL_NAME='{}'".
            format(model_list[idx]))
        for idx_table in range(len(table_names)):
            now_table = table_names[idx_table][0]
            comments = dbc.execQuery(
                "select CONTENT from {}".format(now_table))
            for comment in comments:
                model_comment_array[idx].append(comment[0])

    model_n_comment = zip(model_list, model_comment_array)

    if (file == True):
        for model_name, comments in model_n_comment:
            f = open(model_name + ".txt", 'w', encoding='utf-8')
            print(model_name)
            for comment in comments:
                f.write(comment)
                f.write("\n==============\n")

    return model_n_comment
コード例 #6
0
 def __init__(self):
     self.dbc = DBcontroller()
     self.mecab = analysis()
     # self.mecab.getnouns(comment)
     self.product_name = [
         "구글홈", "아이폰 XS", "갤럭시 S9", "엘지 G7", "엘지 그램 15 2018",
         "삼성 노트북 9 always", "갤럭시탭 S4", "아이패드 6세대", "아이패드 프로 3세대"
     ]
     self.product_name_DB_version = [
         "go_s_home", "ap_p_ipxs", "ss_p_s9", "lg_p_g7", "lg_n_gram15",
         "ss_n_alwy9", "ss_t_galtap4", "ap_t_ipd6", "ap_t_pro3"
     ]
コード例 #7
0
class log:
    def __init__(self):
        self.CRAWLED_DATE = datetime.datetime.today().strftime("%Y-%m-%d")
        self.dbc = DBcontroller()
        self.log_table_name = "LOG_INFO"
        self.WHOLE_DATA_COUNT_QUERY = "SELECT SUM(TABLE_ROWS) FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA='kbrs_db'"


# SELECT SUM(TABLE_ROWS)
# FROM INFORMATION_SCHEMA.TABLES
# WHERE TABLE_SCHEMA = 'kbrs_db';

    def count_log(self):
        result = self.dbc.execQuery(self.WHOLE_DATA_COUNT_QUERY)
        WHOLE_DATA_COUNT = result[0][0]

        check_select_Query = "select * from {} where CRAWLED_DATE='{}'".format(
            self.log_table_name, self.CRAWLED_DATE)
        check_result = self.dbc.execQuery(check_select_Query)

        if (len(check_result) == 0):
            insert_Query = "INSERT INTO {} values({}, '{}')".format(
                self.log_table_name, WHOLE_DATA_COUNT, self.CRAWLED_DATE)
            self.dbc.execQuery(insert_Query)
        else:
            update_Query = "update {} set WHOLE_DATA_COUNT={} where CRAWLED_DATE='{}'".format(
                self.log_table_name, WHOLE_DATA_COUNT, self.CRAWLED_DATE)
            # update {} set VIDEO_TITLE="{}", NUM_OF_REPLY={}, VIEWS={}, CRAWLED_DATE="{}", ' \
            #                                   'LIKES={}, HATES={}, NUM_OF_SUBSCIBER={} where VIDEO_URL="{}"
            self.dbc.execQuery(update_Query)
コード例 #8
0
from crawling import crawling
from DBcontroller import DBcontroller
import datetime

if __name__ == '__main__':
    CRAWLED_DATE = datetime.datetime.today().strftime("%Y-%m-%d")

    dbc = DBcontroller()
    model_name = dbc.execQuery("select MODEL_NAME from MODEL_INFO")
    filter_word = dbc.execQuery("select FILTER_WORD from MODEL_INFO")
    #filter_word = [["XS"],["S9"],["G7"],["그램"],["ALWAYS","올웨이즈"],["S4"],["6세대"],["프로","3세대"]]
    
    MODEL = zip(model_name,filter_word)

    for MODEL_NAME, FILT in MODEL:
        if ',' in FILT[0]:
           FILT = FILT[0].split(',')
        else:
           FILT = [FILT[0]]

        cl = crawling(word=MODEL_NAME[0],filt=FILT) # crawling 클래스 변수 word에 원하는 제품명 넣고 크롤링 시작
        url = cl.geturl()
        size = len(url)
        count_num = 1
        for VIDEO_URL in url:
            print(VIDEO_URL)
            print("%d/%d 번째 동영상 크롤링 중" % (count_num,size))
            count_num+=1
            confirm = cl.getvideo(VIDEO_URL)
            if confirm < 0:
               continue
コード例 #9
0
from DBcontroller import DBcontroller
from KnuSentiLex.knusl import KnuSL
from hanspell import spell_checker
from morpheme import analysis
import re

if __name__ == '__main__':
    nscharacter = re.compile('[^a-zA-Z0-9`~@#$%^&*()-=_+{}\[\],/<>;\'":|\\\]+')
    corpos = analysis()
    ksl = KnuSL
    dbc = DBcontroller()

    product_name = dbc.execQuery("select DISTINCT MODEL_NAME from MEDIA_INFO")
    for product in product_name:
        COMMENT_TABLE = dbc.execQuery(
            "select REPLY_REF_TABLE from MEDIA_INFO where MODEL_NAME = '{}'".
            format(product[0]))
        for TABLE in COMMENT_TABLE:
            COMMENT = dbc.execQuery("select CONTENT from {}".format(TABLE[0]))
            #COMMENT = dbc.execQuery("select CONTENT from {} where SENTIMENTAL='NULL'".format(TABLE[0]))
            for TEXT in COMMENT:
                score = 0
                neg = 0
                pos = 0

                REPLACE_TEXT = TEXT[0].replace('\\',
                                               '\\\\').replace('"', '\\"')
                try:
                    check = spell_checker.check(REPLACE_TEXT)
                    reduction = "".join(nscharacter.findall(check.checked))
                    #words = corpos.mecabpos(reduction)
コード例 #10
0
from DBcontroller import DBcontroller
from analysis import analysis

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
import time

dbc = DBcontroller()


class demo:
    def __init__(self, url):
        self.url = url
        self.options = webdriver.ChromeOptions()
        self.options.add_argument('--headless')
        self.options.add_argument('lang=ko_KR')
        self.options.add_argument('--window-size=1920,1080')

    def test(self):
        driver = webdriver.Chrome('./chromedriver',
                                  chrome_options=self.options)
        driver.get(self.url)
        time.sleep(2)
        driver.find_element_by_tag_name('body').send_keys(Keys.PAGE_DOWN)
        time.sleep(2)

        self.__button(driver, "//*[@id='more']/yt-formatted-string")

        video_html = BeautifulSoup(driver.page_source,
                                   'html.parser')  # 스크롤로 인해서 업데이트된 웹을 html로 파싱
コード例 #11
0
        '''
        self.dispatchid = dispatchid
        self.typename = str(typename)
        self.stage = stage
        self.name = str(name)
        self.char1id = str(char1id)
        self.char2id = str(char2id)
        self.char3id = str(char3id)
        self.char4id = str(char4id)


#connection = mysql.connector.connect(
#host="localhost",
#user="******",
#password="******", port=3306, database="danmemo")
db = DBcontroller("localhost", "root", "danmemo", "3306", "danmemo")
dispatch_dict = dict()

with open('dispatchQuest/dispatch.txt', 'r') as f:
    line = f.readline()
    while (line):

        stage = None
        split_list = line.split(" - ")
        split_list2 = split_list[1].split(":")

        #print(split_list)
        temp = split_list[0]
        if ("(" in temp):
            typename = temp[:temp.find('(')]
            stage = temp[temp.find('('):]
コード例 #12
0
 def __init__(self):
     self.dbc = DBcontroller()
     self.analysis = analysis()
コード例 #13
0
from DBcontroller import DBcontroller
from analysis import analysis

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
import time

dbc = DBcontroller()

class demo:
    def __init__(self, url):
        self.url = url
        self.options = webdriver.ChromeOptions()
        self.options.add_argument('--headless')
        self.options.add_argument('lang=ko_KR')
        self.options.add_argument('--window-size=1920,1080')

    def test(self):
        driver = webdriver.Chrome('./chromedriver', chrome_options=self.options)
        driver.get(self.url)
        time.sleep(2)
        driver.find_element_by_tag_name('body').send_keys(Keys.PAGE_DOWN)
        time.sleep(2)

        self.__button(driver, "//*[@id='more']/yt-formatted-string")

        self.video_html = BeautifulSoup(driver.page_source, 'html.parser')
        video_category = self.__immutabilityvideoinformation()
        driver.quit()
コード例 #14
0
import requests
from DBcontroller import DBcontroller
from bs4 import BeautifulSoup
import datetime

if __name__ == '__main__':
    dbc = DBcontroller()
    CRAWLED_DATE = datetime.datetime.today().strftime("%Y-%m-%d")

    model_name = dbc.execQuery("select MODEL_NAME from MODEL_INFO")

    for idx in range(len(model_name)):
        headers = {
            "Referer":
            "http://search.danawa.com/dsearch.php?query=" +
            str(model_name[idx][0].encode()),
            "User-Agent":
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36"
        }

        data = {"query": model_name[idx][0]}
        check_name_value = 0
        check_cost_value = False
        res = requests.post(
            "http://search.danawa.com/ajax/getProductList.ajax.php",
            headers=headers,
            data=data)
        soup = BeautifulSoup(res.text, "html.parser")
        products = soup.select('ul.product_list > li.prod_item ')

        for pidx in range(len(products)):
コード例 #15
0
class DBnlp_saver:
    def __init__(self):
        self.dbc = DBcontroller()
        self.mecab = analysis()
        # self.mecab.getnouns(comment)
        self.product_name = [
            "구글홈", "아이폰 XS", "갤럭시 S9", "엘지 G7", "엘지 그램 15 2018",
            "삼성 노트북 9 always", "갤럭시탭 S4", "아이패드 6세대", "아이패드 프로 3세대"
        ]
        self.product_name_DB_version = [
            "go_s_home", "ap_p_ipxs", "ss_p_s9", "lg_p_g7", "lg_n_gram15",
            "ss_n_alwy9", "ss_t_galtap4", "ap_t_ipd6", "ap_t_pro3"
        ]

    def comment_to_result_of_nlp(self):
        model_names = self.get_model_names()
        for model_name in model_names:
            bool_exist_table = self.boolean_exist_table(model_name)
            if (bool_exist_table):
                pass
            else:
                self.make_frequency_table_by_model(model_name)
            reply_table_names = self.get_table_name_by_model(model_name)
            for reply_table_name in reply_table_names:
                replies = self.dbc.execQuery(
                    "select CONTENT, ID from {}".format(str(reply_table_name)))
                for idx in range(len(replies)):
                    #merge_nouns_by_comma = ""
                    reply = replies[idx][0]
                    ID = replies[idx][1]
                    nouns = self.mecab.mecabnouns(str(reply))
                    if (len(nouns) == 0):
                        pass
                    else:
                        for noun in nouns:
                            self.register_frequency_word_in_table(
                                model_name=model_name, word=noun)
                            #merge_nouns_by_comma = merge_nouns_by_comma + noun + ", "
                        #merge_nouns_by_comma = merge_nouns_by_comma[:-2]
                    #update_reply_table = "update {0} set NOUNS_FROM_COMMENT='{1}' where ID={2}"
                    #update_query = update_reply_table.format(reply_table_name, merge_nouns_by_comma, ID)
                    #self.dbc.execQuery(update_query)

    def add_colmn_nlp_result_REPLY_REF_TABLE(self):
        col_info = "NOUNS_FROM_COMMENT varchar(1023) NOT NULL"
        model_names = self.get_model_names()
        idx = 0
        for model_name in model_names:
            reply_table_names = self.get_table_name_by_model(model_name)
            for table_name in reply_table_names:
                print("alter table {0} add {1}".format(table_name, col_info))
                self.dbc.execQuery("alter table {0} add {1}".format(
                    table_name, col_info))
                print("alter table {0} add {1}".format(table_name, col_info))
                idx = idx + 1
        print("add_colmn_complete : ", idx)


## 아래로는 사용하는 자체 method이다.

    def get_model_names(self):
        model_names = []
        model_name_array = self.dbc.execQuery(
            "select distinct MODEL_NAME from MEDIA_INFO")
        for idx in range(len(model_name_array)):
            model_names.append(model_name_array[idx][0])
        return model_names

    def get_table_name_by_model(self, model_name=None):
        reply_tables = []
        if model_name == None:
            reply_table_array = self.dbc.execQuery(
                "select REPLY_REF_TABLE from MEDIA_INFO")
        else:
            reply_table_array = self.dbc.execQuery(
                "select REPLY_REF_TABLE from MEDIA_INFO where MODEL_NAME='{}'".
                format(model_name))
        for idx in range(len(reply_table_array)):
            reply_tables.append(reply_table_array[idx][0])
        return reply_tables

    def make_frequency_table_by_model(self, model_name):
        table_name = self.get_table_name(model_name)
        schema_create_query = """
        CREATE TABLE {}(
          WORD varchar(63) NOT NULL,
          FREQUENCY int NOT NULL,

          PRIMARY KEY (WORD)
        )
        """
        schema = schema_create_query.format(table_name)

        self.dbc.execQuery(schema)

    def boolean_exist_table(self, model_name):
        table_name = self.get_table_name(model_name)
        result = self.dbc.execQuery("select * from {}".format(table_name))
        if (len(result) == 0):
            return False
        else:
            return True

    def get_table_name(self, model_name):
        model_code = ""
        for idx in range(len(self.product_name)):
            if (model_name == self.product_name[idx]):
                model_code = self.product_name_DB_version[idx]
        table_name = str(model_code) + "_FREQUENCY"
        return table_name

    def register_frequency_word_in_table(self, model_name, word):
        table_name = self.get_table_name(model_name)
        result = self.dbc.execQuery(
            "select FREQUENCY from {} where WORD='{}'".format(
                table_name, word))
        if (len(result) == 0):
            self.insert_noun_frequency_table(model_name, word)
        else:
            FREQUENCY = result[0][0]
            self.update_noun_frequency_table(model_name, word, FREQUENCY)

    def insert_noun_frequency_table(self, model_name, word):
        table_name = self.get_table_name(model_name)
        self.dbc.execQuery("insert into {0} values('{1}', {2})".format(
            table_name, word, 1))

    def update_noun_frequency_table(self, model_name, word, FREQUENCY):
        table_name = self.get_table_name(model_name)
        new_FREQUENCY = FREQUENCY + 1
        self.dbc.execQuery(
            "update {0} set FREQUENCY={1} where WORD='{2}'".format(
                table_name, new_FREQUENCY, word))
コード例 #16
0
                        return_reply_table_name, content, CRAWLED_DATE, author)
                print("complete:insert case")
            elif len(result_list) == 7:
                print("update table")
                VIEWS, LIKES, HATES, NUM_OF_SUBSCIBER, VIDEO_TITLE, NUM_OF_REPLY, COMMENT = result_list
                before_comment_count = dbc.get_num_of_reply_by_url(VIDEO_URL)
                after_comment_count = NUM_OF_REPLY - before_comment_count
                Reply_ref_table_name = dbc.Update_video_in_MEDIA_INFO_return_reply_table(
                    VIDEO_URL, VIDEO_TITLE, NUM_OF_REPLY, VIEWS, CRAWLED_DATE,
                    LIKES, HATES, NUM_OF_SUBSCIBER)
                for author, content in reversed(COMMENT[:after_comment_count]):
                    dbc.Insert_reply_info_by_table_name(
                        Reply_ref_table_name, content, CRAWLED_DATE, author)
                print("complete2:update case")
        except:
            print("동영상 또는 댓글이 없습니다.")
            continue


if __name__ == '__main__':
    dbc = DBcontroller()
    CRAWLED_DATE, MODEL = ready_date()

    for MODEL_NAME, FILT in MODEL:
        if ',' in FILT[0]:
            FILT = FILT[0].split(',')
        else:
            FILT = [FILT[0]]

        start_crawling(MODEL_NAME[0], FILT)
コード例 #17
0
 def __init__(self):
     self.CRAWLED_DATE = datetime.datetime.today().strftime("%Y-%m-%d")
     self.dbc = DBcontroller()
     self.log_table_name = "LOG_INFO"
     self.WHOLE_DATA_COUNT_QUERY = "SELECT SUM(TABLE_ROWS) FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA='kbrs_db'"
コード例 #18
0
from DBcontroller import DBcontroller
from hanspell import spell_checker
from morpheme import analysis
import re

if __name__ == '__main__':
    dbc = DBcontroller()
    nscharacter = re.compile(
        '[^a-zA-Z0-9`~!@#$%^&*()-=_+{}\[\],./<>?;\'":|\\\]+')
    corpos = analysis()

    Querys = dbc.execQuery("select CONTENT from ap_p_ipxs_870")
    Result_list = []
    for tmp in Querys:
        Result_list.append(tmp[0])

    for result in Result_list:
        REPLACE_TEXT = result.replace('\\', '\\\\').replace('"', '\\"')
        try:
            check = spell_checker.check(REPLACE_TEXT)
            reduction = "".join(nscharacter.findall(check.checked))
            words = corpos.mecabpos(reduction)
        except:
            reduction = "".join(nscharacter.findall(REPLACE_TEXT))
            words = corpos.mecabpos(reduction)

        print(words)
        '''
            CONTENT = dbc.execQuery("select CONTENT FROM {} where CONTENT like '%보겸%'".format(result))
            for text in CONTENT:
                print(text)