def setUpDb(app, config): ''' Setup the database and create the table ''' app.config.from_object(config) with app.app_context(): db = DB() db.conn(table_name=app.config["TABLE_NAME"][0], region_name=app.config["REGION_NAME"][0], endpoint_url=app.config["ENPOINT_URL"][0], aws_access_key_id=app.config["AWS_ACCESS_KEY_ID"][0], aws_secret_access_key=app.config["AWS_SECRET_ACCESS_KEY"][0]) db.createTable()
def save(detail_list: List): """ 保存する :param detail_list: テキストリスト """ conn = DB.conn() query = 'INSERT INTO scraping (`url`, `content`, `create_date`, `update_date`) VALUES (%s, %s, %s, %s) ' \ 'ON DUPLICATE KEY UPDATE `content` = VALUES (`content`), `update_date` = VALUES (`update_date`)' values = [] current_date = datetime.today().strftime('%Y-%m-%d') for detail in detail_list: value = [ detail['link'], '\n'.join(detail['content']), current_date, current_date ] values.append(value) cursor = conn.cursor() try: cursor.executemany(query, values) conn.commit() except Exception as e: conn.rollback() raise e finally: cursor.close() conn.close()
def __save(keywords: List): """ 保存する :param keywords: 名詞のリスト """ keyword_list = [] """ キーに要素、値に重複数をもつ辞書型サブクラスを返す Counter({'名詞1': 10, '名詞2': 5, '名詞3': 1}) """ words = collections.Counter(keywords) current_date = datetime.today().strftime('%Y-%m-%d') for word in words: keyword_list.append([word, words[word], current_date]) conn = DB.conn() """同名の `name` があったときに数を加算していく""" query = 'INSERT INTO keywords (`name`, `num`, `create_date`) VALUES (%s, %s, %s) ' \ 'ON DUPLICATE KEY UPDATE `num` = `num` + VALUES(`num`)' cursor = conn.cursor() try: cursor.executemany(query, keyword_list) conn.commit() del keyword_list del words except Exception as e: conn.rollback() raise e finally: cursor.close() conn.close()
def save_words(self): print('start save_words...') conn = DB.conn() """今日スクレイピングしたデータのみが対象""" query = 'SELECT content FROM scraping WHERE `update_date` = %s' current_date = datetime.today().strftime('%Y-%m-%d') cursor = conn.cursor() cursor.execute(query, [current_date]) rows = cursor.fetchall() for row in rows: keywords = self.__analyze(str(row[0])) self.__save(keywords) cursor.close() conn.close() del rows print('done save_words')