Ejemplo n.º 1
0
    def refine(self):
        cc = cm.CrawlClass()
        crawl_version = self.now.strftime('%Y%m%d')
        rows = cc.original_select(self.convention_name, crawl_version)
        convention_info = cc.convention_select(self.convention_name)
        for row in rows:
            data = {}
            animal = re.search(cc.pattern_title_animal(), row[2])
            plant = re.search(cc.pattern_title_plant(), row[2])
            if animal is not None:
                pet_cat_cd = 'animal'
                match = animal
            elif plant is not None:
                pet_cat_cd = 'plant'
                match = plant
            else:
                pet_cat_cd = ''
                match = False

            pattern_host = r'\"주최\".*\<\/dt\>\n\<dd\>(.*)\<\/dd\>'
            pattern_supvsn = r'\"주관사\".*\<\/dt\>\n\<dd\>(.*)\n*\t*\<\/dd\>'
            pattern_addt_dtl = r'장소.*\s*\<dd\>\s*(.*\s*.*)\s*'
            pattern_date = r'\"기간\"\s.*\<\/dt\>\n\<dd\>\s*\t*(.*)\s*\t*\<\/dd\>'
            pattern_time = r'\"관람시간\"\s?.*\<\/dt\>\n\<dd\>\s*\t*(.*)\s*\t*\<\/dd\>'
            pattern_cost = r'입장료\".*\s*\<dd\>\s*(.*)\s*\<\/dd\>'
            pattern_phone = r'\"전화번호\".*\s*\<dd\>\<span.*\s*([0-9]*-[0-9]*-[0-9]*).*\s*\<\/span\>'
            pattern_home = r'\"홈페이지\".*\s*\<dd\>.*\s*\<a href\=\".*\"\>(.*)\<\/a\>'
            # pattern_ctn = r''
            reg_date = self.now.strftime('%Y-%m-%d %H:%M:%S')
            source_url = row[7]

            if match:
                host = re.findall(pattern_host, row[5])
                if len(host) != 0:
                    str_host = host[0]
                else:
                    str_host = ''
                supvsn = re.findall(pattern_supvsn, row[5])
                if len(supvsn) != 0:
                    str_supvsn = supvsn[0].strip()
                else:
                    str_supvsn = ''
                addt_dtl = re.findall(pattern_addt_dtl, row[5])
                if len(addt_dtl) != 0:
                    str_addt_dtl = addt_dtl[0].strip().replace('\n', '')
                else:
                    str_addt_dtl = ''
                date = re.findall(pattern_date, row[5])
                tempdate = str(date).replace("['", "").replace("']",
                                                               "").strip()
                date_index = tempdate.find('~')
                date_start = datetime.datetime.strptime(
                    tempdate[0:date_index].replace('.', '-').strip(),
                    '%Y-%m-%d')
                date_end = datetime.datetime.strptime(
                    tempdate[date_index + 1:len(tempdate)].replace(
                        '.', '-').strip(), '%Y-%m-%d')
                time = re.findall(pattern_time, row[5])
                if len(time) > 0:
                    str_time = time[0]
                else:
                    str_time = ''
                cost = re.findall(pattern_cost, row[5])
                if len(cost) != 0:
                    str_cost = cost[0]
                else:
                    str_cost = ''
                phone = re.findall(pattern_phone, row[5])
                if len(phone) != 0:
                    str_phone = phone[0].strip()
                else:
                    str_phone = ''
                home = re.findall(pattern_home, row[5])
                if len(home[0]) > 0:
                    str_home_url = home[0]
                    pattern_home_url = r'http://'
                    home_url = re.search(pattern_home_url, str_home_url)
                    if home_url:
                        res = urllib.request.urlopen(str_home_url).read()
                    else:
                        encoding_url = parse.urlparse(str_home_url)
                        temp_url = encoding_url.scheme if len(
                            encoding_url.scheme
                        ) != 0 else 'http' + '://' + encoding_url.netloc + quote(
                            encoding_url.path)
                        res = urllib.request.urlopen(temp_url).read()
                    soup = Bs(res, 'html.parser')
                    home_title = soup.find_all('title', limit=1)
                    str_home_title = home_title[0].text
                else:
                    str_home_url = ''
                    str_home_title = ''

                data['TP_CD'] = 'fest'
                data['PET_CAT_CD'] = pet_cat_cd
                data['TTL'] = match.string
                data['HOST_NM'] = str_host
                data['SUPVSN'] = str_supvsn
                data['ADDR'] = convention_info[0][4]
                data['ADDR_DTL'] = str_addt_dtl
                data['LOC'] = self.convention_name
                data['ZIPNO'] = convention_info[0][6]
                data['LAT'] = convention_info[0][7]
                data['LNG'] = convention_info[0][8]
                data['FR_DATE'] = date_start
                data['TO_DATE'] = date_end
                data['EVNT_TIME'] = str_time
                data['ONLN_YN'] = 'N'
                data['OFFLN_YN'] = 'Y'
                data['ENTR_COST'] = str_cost
                data['HPG_NM'] = str_home_title
                data['HPG_URL'] = str_home_url
                data['QNA'] = str_phone
                data['CTN'] = row[6] if row[6] is not None else ''
                data['M_IMG_ID'] = row[8] if row[8] is not None else ''
                data['LIST_IMG_ID'] = ''
                data['COMP_NM'] = str_host
                data['DAY_CD'] = cc.get_day_cd(date_start, date_end)
                data['RGN_CD'] = cc.get_rgn_cd(convention_info[0][4][0:2])
                data['DEL_YN'] = 'N'
                data['REG_ID'] = 'crawler'
                data['REG_DTTM'] = reg_date
                data['UPD_ID'] = 'crawler'
                data['UPD_DTTM'] = reg_date
                data['CRAWL_VERSION'] = crawl_version
                data['SOURCE_URL'] = source_url
                data['CONVENTION_NAME'] = self.convention_name
                data['EVENT_TYPE'] = row[3] if row[3] is not None else ''
                cc.evnt_insert(data)
        cc.commit()
        cc.close()
Ejemplo n.º 2
0
    def refine(self):
        cc = cm.CrawlClass()
        crawl_version = self.now.strftime('%Y%m%d')
        rows = cc.original_select(self.convention_name, crawl_version)
        convention_info = cc.convention_select(self.convention_name)
        for row in rows:
            data = {}
            animal = re.search(cc.pattern_title_animal(), row[2])
            plant = re.search(cc.pattern_title_plant(), row[2])
            if animal is not None:
                pet_cat_cd = 'animal'
                match = animal
            elif plant is not None:
                pet_cat_cd = 'plant'
                match = plant
            else:
                pet_cat_cd = ''
                match = False
            pattern_host = r'\<dt\>주최\/주관\<\/dt\>\n\<dd\>(.*?)\<\/dd\>'
            # pattern_supvsn = r'<li><span class="tit">주관<\/span>(.*?)<\/li>'
            pattern_addt_dtl = r'\<dt\>장소\<\/dt\>\n\<dd\>(.*?)\<\/dd\>'
            pattern_date = r'\<dt\>기간<\/dt\>\n\<dd\>(.*?)\<\/dd\>'
            pattern_time = r'\<dt\>시간\<\/dt\>\n\<dd\>(.*?)\<\/dd\>'
            pattern_cost = r'\<dt\>관람료\<\/dt\>\n\<dd\>(.*?)\<\/dd\>'
            pattern_phone = r'\<dt\>전화\<\/dt\>\n\<dd\>\n\t{4}(.*?)\n\t{4}\<\/dd\>'
            pattern_home = r'\<dt\>홈페이지\<\/dt\>\n\<dd\>\n\t*\<a.*\"\>(.*?)\<\/a\>\n\t*\<\/dd\>'
            # pattern_ctn = r''
            reg_date = self.now.strftime('%Y-%m-%d %H:%M:%S')
            source_url = row[7]

            if match:
                host = re.findall(pattern_host, row[5])
                if len(host) > 0:
                    str_host = host[0]
                else:
                    str_host = ''
                # supvsn = re.findall(pattern_supvsn, row[5])
                # if len(supvsn) > 0:
                #     str_supvsn = supvsn[0]
                # else:
                #     str_supvsn = ''
                addt_dtl = re.findall(pattern_addt_dtl, row[5])
                if len(addt_dtl) > 0:
                    str_addt_dtl = addt_dtl[0]
                else:
                    str_addt_dtl = ''
                date = re.findall(pattern_date, row[5])
                tempdate = str(date).replace("['", "").replace("']",
                                                               "").strip()
                date_index = tempdate.find('~')
                date_start = datetime.datetime.strptime(
                    tempdate[0:date_index].replace('.', '-').strip(),
                    '%Y-%m-%d')
                date_end = datetime.datetime.strptime(
                    tempdate[date_index + 1:len(tempdate)].replace(
                        '.', '-').strip(), '%Y-%m-%d')
                time = re.findall(pattern_time, row[5])
                if len(time) > 0:
                    str_time = time[0]
                else:
                    str_time = ''

                cost = re.findall(pattern_cost, row[5])
                if len(cost) > 0:
                    str_cost = cost[0]
                else:
                    str_cost = ''
                phone = re.findall(pattern_phone, row[5])
                if len(phone) > 0:
                    str_phone = phone[0]
                else:
                    str_phone = ''

                home = re.findall(pattern_home, row[5])
                if len(home) > 0:
                    str_home_url = home[0]
                    res = urllib.request.urlopen(str_home_url).read()
                    soup = Bs(res, 'html.parser')
                    home_title = soup.find_all('title', limit=1)
                    str_home_title = home_title[0].text
                else:
                    str_home_url = ''
                    str_home_title = ''

                data['TP_CD'] = 'fest'
                data['PET_CAT_CD'] = pet_cat_cd
                data['TTL'] = match.string
                data['HOST_NM'] = str_host
                data['SUPVSN'] = str_host
                data['ADDR'] = convention_info[0][4]
                data['ADDR_DTL'] = str_addt_dtl
                data['LOC'] = self.convention_name
                data['ZIPNO'] = convention_info[0][6]
                data['LAT'] = convention_info[0][7]
                data['LNG'] = convention_info[0][8]
                data['FR_DATE'] = date_start
                data['TO_DATE'] = date_end
                data['EVNT_TIME'] = str_time
                data['ONLN_YN'] = 'N'
                data['OFFLN_YN'] = 'Y'
                data['ENTR_COST'] = str_cost
                data['HPG_NM'] = str_home_title
                data['HPG_URL'] = str_home_url
                data['QNA'] = str_phone
                data['CTN'] = row[6] if row[6] is not None else ''
                data['M_IMG_ID'] = row[8] if row[8] is not None else ''
                data['LIST_IMG_ID'] = ''
                data['COMP_NM'] = str_host
                data['DAY_CD'] = cc.get_day_cd(date_start, date_end)
                data['RGN_CD'] = cc.get_rgn_cd(convention_info[0][4][0:2])
                data['DEL_YN'] = 'N'
                data['REG_ID'] = 'crawler'
                data['REG_DTTM'] = reg_date
                data['UPD_ID'] = 'crawler'
                data['UPD_DTTM'] = reg_date
                data['CRAWL_VERSION'] = crawl_version
                data['SOURCE_URL'] = source_url
                data['CONVENTION_NAME'] = self.convention_name
                data['EVENT_TYPE'] = row[3] if row[3] is not None else ''
                cc.evnt_insert(data)
        cc.commit()
        cc.close()
Ejemplo n.º 3
0
    def insert(self):
        cc = cm.CrawlClass()
        crawl_version = self.now.strftime('%Y%m%d')
        rows = cc.original_select(self.convention_name, crawl_version)
        # convention_info = cc.convention_select(self.convention_name)
        for row in rows:
            data = {}
            animal = re.search(cc.pattern_title_animal(), row[2])
            plant = re.search(cc.pattern_title_plant(), row[2])
            if animal is not None:
                pet_cat_cd = 'animal'
                match = animal
            elif plant is not None:
                pet_cat_cd = 'plant'
                match = plant
            else:
                pet_cat_cd = ''
                match = False
            pattern_addr = r'위치\<\/th\>\s*\<td\>(.*)\<\/td\>'
            pattern_addr_dtl = r'행사장소\<\/th\>\s*\<td\>(.*)\<\/td\>'
            pattern_cost = r'입장료\<\/th\>\s*\<td\>(.*)\s*\<\/td\>'
            pattern_date = r'행사기간\<\/th\>\s*\<td\>(.*)\<\/td\>'
            pattern_home = r'홈페이지\<\/th\>\s*\<td\>.*\_blank\"\>(.*)\<\/a\>\<\/td\>'
            pattern_host = r'주최\<\/th\>\s*\<td\>(.*)\<\/td\>'
            pattern_phone = r'연락처\<\/th\>\s*\<td\>.*\"\>(.*)\<\/a\>\<\/td\>'
            pattern_supvsn = r'주관\<\/th\>\s*\<td\>(.*)\<\/td\>'
            pattern_time = r'\<dt\>시간\<\/dt\>\n\<dd\>(.*?)\<\/dd\>'
            # pattern_ctn = r''
            reg_date = self.now.strftime('%Y-%m-%d %H:%M:%S')
            source_url = row[7]
            insert_flag = True
            if match:
                host = re.findall(pattern_host, row[5])
                if len(host) > 0:
                    str_host = host[0]
                else:
                    str_host = ''
                supvsn = re.findall(pattern_supvsn, row[5])
                if len(supvsn) > 0:
                    str_supvsn = supvsn[0]
                else:
                    str_supvsn = ''
                addr = re.findall(pattern_addr, row[5])
                if len(addr) > 0:
                    str_addr = addr[0]
                else:
                    str_addr = ''

                addt_dtl = re.findall(pattern_addr_dtl, row[5])
                if len(addt_dtl) > 0:
                    str_addt_dtl = addt_dtl[0]
                else:
                    str_addt_dtl = ''
                date = re.findall(pattern_date, row[5])
                tempdate = str(date).replace("['", "").replace("']",
                                                               "").strip()
                date_index = tempdate.find('~')
                date_start = datetime.datetime.strptime(
                    tempdate[0:date_index].replace('.', '-').strip(),
                    '%Y-%m-%d')
                date_end = datetime.datetime.strptime(
                    tempdate[date_index + 1:len(tempdate)].replace(
                        '.', '-').strip(), '%Y-%m-%d')
                temp_time = re.findall(pattern_time, row[5])
                if len(temp_time) > 0:
                    str_time = temp_time[0]
                else:
                    str_time = ''
                cost = re.findall(pattern_cost, row[5])
                if len(cost) > 0:
                    str_cost = cost[0]
                else:
                    str_cost = ''
                phone = re.findall(pattern_phone, row[5])
                if len(phone) > 0:
                    str_phone = phone[0]
                else:
                    str_phone = ''

                home = re.findall(pattern_home, row[5])
                if len(home) > 0 and home[0] != '':
                    pattern_hangul = r'[가-힣]'
                    hangul = re.findall(pattern_hangul, home[0])
                    if len(hangul) > 0:
                        str_home_url = home[0]
                        pre_url = home[0][0:home[0].index(hangul[0])]
                        sub_url = home[0][home[0].index(hangul[len(hangul) -
                                                               1]) +
                                          1:len(home[0])]
                        encode_url1 = self.encode_url(hangul)
                        str_home_url = str(
                            encode_url1)[2:len(str(encode_url1)) - 1] + sub_url
                        res = urllib.request.urlopen(pre_url + str_home_url)

                        soup = Bs(res, 'html.parser')
                        home_title = soup.select('head > title')
                        str_home_title = home_title[0].text if len(
                            home_title) > 0 else ''
                    else:
                        str_home_url = home[0]
                        res = urllib.request.urlopen(str_home_url).read()

                        soup = Bs(res, 'html.parser')
                        home_title = soup.select('head > title')
                        str_home_title = home_title[0].text if len(
                            home_title) > 0 else ''
                else:
                    str_home_url = ''
                    str_home_title = ''

                if match.string[0:1] == '[':
                    insert_flag = False
                else:
                    str_rgn_cd = '9999'

                if insert_flag:
                    print(match.string)
                    data['TP_CD'] = 'fest'
                    data['PET_CAT_CD'] = pet_cat_cd
                    data['TTL'] = match.string
                    data['HOST_NM'] = str_host
                    data['SUPVSN'] = str_supvsn
                    data['ADDR'] = str_addr
                    data['ADDR_DTL'] = str_addt_dtl
                    data['LOC'] = str_addt_dtl
                    data['ZIPNO'] = ''
                    data['LAT'] = 0
                    data['LNG'] = 0
                    data['FR_DATE'] = date_start
                    data['TO_DATE'] = date_end
                    data['EVNT_TIME'] = str_time
                    data['ONLN_YN'] = 'N'
                    data['OFFLN_YN'] = 'Y'
                    data['ENTR_COST'] = str_cost
                    data['HPG_NM'] = str_home_title
                    data['HPG_URL'] = str_home_url
                    data['QNA'] = str_phone
                    data['CTN'] = row[6] if row[6] is not None else ''
                    data['M_IMG_ID'] = row[8] if row[8] is not None else ''
                    data['LIST_IMG_ID'] = ''
                    data['COMP_NM'] = str_host
                    data['DAY_CD'] = cc.get_day_cd(date_start, date_end)
                    data['RGN_CD'] = str_rgn_cd
                    data['DEL_YN'] = 'N'
                    data['REG_ID'] = 'crawler'
                    data['REG_DTTM'] = reg_date
                    data['UPD_ID'] = 'crawler'
                    data['UPD_DTTM'] = reg_date
                    data['CRAWL_VERSION'] = crawl_version
                    data['SOURCE_URL'] = source_url
                    data['CONVENTION_NAME'] = self.convention_name
                    data['EVENT_TYPE'] = row[3] if row[3] is not None else ''
                    cc.evnt_insert(data)
        cc.commit()
        cc.close()
Ejemplo n.º 4
0
    def test_insert1(self):
        cc = cm.CrawlClass()
        rows = cc.content_select(self.convention_name)
        cnt = 0
        for row in rows:
            data = {}
            cnt += 1
            title_songdoconvensia = r'\<strong.*\>(.*)\<\/strong\>'
            title_pattern = r"(캣|도그|펫|동물|애견|애완)"  # row[3] => 페이지소스
            # match = re.findall(title_songdoconvensia, row[3])  # 제목을 먼저 찾아낸다
            match2 = re.search(title_pattern, row[2])  # 찾아낸 제목에서 키워드로 필터링
            pattern_host = r'주최\<\/th\>\n\<td\>[\n\t ]*(.*)[\n\t ]*\<\/td\>'
            pattern_manage = r'주관\<\/th\>\n\<td\>[\n\t ]*(.*)[\n\t ]*\<\/td\>'
            #pattern_date = r'일정\<\/th\>\n.*(.*)\n' #
            pattern_date = r'일정\<\/th\>\n\<td colspan\=\"3\"\>\<strong\>(.*)\n'
            pattern_time = r'([0-9]{2}:[0-9]{2}.?[0-9]{2}:[0-9]{2})'
            pattern_place = r'위치\<\/th\>\n.*\<strong\>(.*)\<\/strong\>'
            pattern_money = r'입장료\<\/th\>\n\<td\>(.*)\<\/td\>'
            pattern_phone = r'문의처\<\/th\>\n\<td\>(.*)\<\/td\>'
            pattern_url = row[6]  # 해당 페이지 주소
            pattern_home = r'행사홈페이지\<\/th\>\n.*\<a.*\>(.*)\<\/a\>'
            now = datetime.datetime.now()
            reg_date = now.strftime('%Y-%m-%d %H:%M:%S')
            z_start = ''
            z_end = ''
            if match2:
                place = re.findall(pattern_place, row[5])
                if len(place) != 0:
                    str_place = place[0]
                else:
                    str_place = ''
                date = re.findall(pattern_date, row[5])
                tempdate = str(date).replace("['", "").replace("']",
                                                               "").strip()
                date_index = tempdate.find('~')
                date_start = tempdate[0:date_index].replace('년 ', '-').replace(
                    '월 ', '-').replace('일', '')
                date_end = tempdate[date_index + 1:len(tempdate)].replace(
                    '년 ', '-').replace('월 ', '-').replace('일', '')
                time = re.findall(pattern_time, row[5])
                if len(time) == 0:
                    time_start = ''
                    time_end = ''
                else:
                    temptime = str(time[0]).replace("['",
                                                    "").replace("']",
                                                                "").strip()
                    time_index = temptime.find('~')
                    time_start = temptime[0:time_index]
                    time_end = temptime[time_index + 1:len(temptime)]
                phone = re.findall(pattern_phone, row[5])
                if len(phone) != 0:
                    str_phone = phone[0].strip()
                else:
                    str_phone = ''
                home = re.findall(pattern_home, row[5])
                if len(home) > 0:
                    str_home = home[0].strip()
                else:
                    str_home = ''
                manage = re.findall(pattern_manage, row[5])
                if len(manage) != 0:
                    str_manage = manage[0].strip()
                else:
                    str_manage = ''
                host = re.findall(pattern_host, row[5])
                if len(host) != 0:
                    str_host = host[0]
                else:
                    str_host = ''
                money = re.findall(pattern_money, row[5])
                if len(money) != 0:
                    str_money = money[0]
                else:
                    str_money = ''
                print("주최 {}".format(str_host))
                print("주관 {}".format(str_manage))
                print(date)
                print(
                    datetime.datetime.strptime(date_start.strip(), '%Y-%m-%d'))
                d_start = datetime.datetime.strptime(date_start.strip(),
                                                     '%Y-%m-%d')
                print(datetime.datetime.strptime(date_end.strip(), '%Y-%m-%d'))
                d_end = datetime.datetime.strptime(date_end.strip(),
                                                   '%Y-%m-%d')
                #print('{start} ~ {end}'.format(start=time_start, end=time_end))
                print("장소 {}".format(str_place))
                print("돈 {}".format(str_money))
                print("폰번호 {}".format(str_phone))
                print("홈페이지 {}".format(str_home))

                data['convention_name'] = self.convention_name
                data['event_name'] = match2.string
                data['event_type'] = row[3]
                data['place'] = str_place
                data['date_start'] = d_start
                data['data_end'] = d_end
                data['time_start'] = time_start
                data['time_end'] = time_end
                data['phone'] = str_phone
                data['home_page'] = str_home
                data['manage'] = str_manage
                data['host'] = str_host
                data['money'] = str_money
                data['source_url'] = pattern_url
                data['reg_date'] = reg_date
                cc.content_insert(data)
        cc.commit()
        cc.close()
Ejemplo n.º 5
0
    def test_insert1(self):
        cc = cm.CrawlClass()
        rows = cc.content_select(self.convention_name)
        cnt = 0
        for row in rows:
            cnt += 1
            # title_iccjeju = r'<li\b[^>]*>행 사 명 : (.*?)<\/li>'
            title_pattern = r"(캣|도그|펫|동물|애견|애완)"  # row[3] => 페이지소스
            # match = re.findall(title_iccjeju, row[3])  # 제목을 먼저 찾아낸다
            match2 = re.search(title_pattern, row[2])  # 찾아낸 제목에서 키워드로 필터링
            pattern_host = r'\<li\>주\s*최\s?\:\s?(.*)\<\/li\>'
            # pattern_manage = r'\<td id\=\"etc6\"\>(.*)\<\/td\>'
            pattern_date = r'\<li\>기\s*간\s?\:\s?(.*)\<\/li\>'
            # pattern_time = r'\<em\>시 간\<\/em\>\<span\>(.*)\<\/span\>\<\/li\> \<li\>\<em\>장'
            pattern_place = r'\<li\>장\s*소\s?\:\s?(.*)\<\/li\>'
            # pattern_money = r'\<td id\=\"etc4\"\>(.*)\<\/td\>'
            pattern_phone = r'[0-9]{2,3}\-[0-9]{4}\-[0-9]{4}'
            # pattern_url = row[4]  # 해당 페이지 주소
            pattern_home = r'웹사이트.*\>\s*(.*)\s*\<\/a\>'
            now = datetime.datetime.now()
            reg_date = now.strftime('%Y-%m-%d %H:%M:%S')
            # z_start = ''
            # z_end = ''
            if match2:
                data = {}
                place = re.findall(pattern_place, row[5])
                if len(place) != 0:
                    str_place = place[0]
                else:
                    str_place = ''
                date = re.findall(pattern_date, row[5])
                tempdate = str(date).replace("['", "").replace("']",
                                                               "").strip()
                date_index = tempdate.find('~')
                date_start = tempdate[0:date_index].replace('.', '-')
                date_end = tempdate[date_index + 1:len(tempdate)].replace(
                    '.', '-')
                # time = re.findall(pattern_time, row[5])
                # temptime = str(time).replace("['", "").replace("']", "").strip()
                # time_index = temptime.find('~')
                # time_start = temptime[0:time_index]
                # time_end = temptime[time_index+1:len(temptime)]
                phone = re.findall(pattern_phone, row[5])
                if len(phone) != 0:
                    str_phone = phone[0].strip()
                else:
                    str_phone = ''
                home = re.findall(pattern_home, row[5])
                if len(home) > 0:
                    str_home = home[0].strip()
                else:
                    str_home = ''
                # manage = re.findall(pattern_manage, row[5])
                # if len(manage) != 0:
                #     str_manage = manage[0].strip()
                # else:
                #     str_manage = ''
                host = re.findall(pattern_host, row[5])
                if len(host) != 0:
                    str_host = host[0]
                else:
                    str_host = ''
                # money = re.findall(pattern_money, row[5])
                # if len(money) != 0:
                #     str_money = money[0]
                # else:
                #     str_money = ''
                print("주최 {}".format(host[0]))
                # print("주관 {}".format(str_manage))
                print(date)
                print(
                    datetime.datetime.strptime(date_start.strip(), '%Y-%m-%d'))
                d_start = datetime.datetime.strptime(date_start.strip(),
                                                     '%Y-%m-%d')
                print(datetime.datetime.strptime(date_end.strip(), '%Y-%m-%d'))
                d_end = datetime.datetime.strptime(date_end.strip(),
                                                   '%Y-%m-%d')
                # print('{start} ~ {end}'.format(start=time_start, end=time_end))
                print("장소 {}".format(str_place))
                # print("돈 {}".format(str_money))
                print("폰번호 {}".format(str_phone))
                print("홈페이지 {}".format(str_home))

                data['convention_name'] = self.convention_name
                data['event_name'] = match2.string.strip()
                data['event_type'] = row[3]
                data['place'] = str_place
                data['date_start'] = d_start
                data['data_end'] = d_end
                data['time_start'] = ''
                data['time_end'] = ''
                data['phone'] = str_phone
                data['home_page'] = str_home
                data['manage'] = ''
                data['host'] = str_host
                data['money'] = ''
                data['source_url'] = 'http://www.iccjeju.co.kr/Event/Schedule'
                data['reg_date'] = reg_date
                cc.content_insert(data)
        cc.commit()
        cc.close()