def main(): GET_SQL = "SELECT song_id,song_lrc FROM song" PUT_SQL = "INSERT INTO res_lrc(song_id,lrc) VALUES(%s,%s)" db = DBUtil.getConnection() cursor = db.cursor() cursor.execute(GET_SQL) res = cursor.fetchall() for item in res: lrc_json = json.loads(item[1]) try: lrc = lrc_json['lrc']['lyric'].strip().split('\n') tlrc = lrc_json['tlyric']['lyric'] except KeyError: continue if tlrc: continue words = '' for line in lrc: line = line[line.find(']')+1:].strip() line = line[line.find('】')+1:].strip() if ":" in line or ":" in line: continue words += line + " " words = words.strip() if words: cursor.execute(PUT_SQL, (item[0], words)) db.commit() print(item[0], words) db.close()
def getLrcByWord(word): GET_LIST_SQL = "SELECT song_id FROM res_word WHERE word=%s ORDER BY RAND() LIMIT 5" GET_NAME_SQL = "SELECT song_name FROM song WHERE song_id=%s" GET_SINGER_SQL = "SELECT singer_name FROM singer,singers_sing WHERE song_id=%s AND " \ "singer.singer_id=singers_sing.singer_id " GET_LRC_SQL = "SELECT lrc FROM res_lrc WHERE song_id=%s" db, cursor = DBUtil.connect() cursor.execute(GET_LIST_SQL, word) res_list = cursor.fetchall() data = list() for item in res_list: song_id = item[0] song_data = {'id': song_id} cursor.execute(GET_NAME_SQL, song_id) song_name = cursor.fetchone()[0] song_data['name'] = song_name cursor.execute(GET_SINGER_SQL, song_id) singer_name = cursor.fetchone()[0] song_data['singer'] = singer_name cursor.execute(GET_LRC_SQL, song_id) song_lrc = cursor.fetchone()[0] song_lrc = song_lrc.split() for lrc in song_lrc: if word in lrc: song_data['lrc'] = lrc break data.append(song_data) return data
def getWordsStat(): SQL = "SELECT word, cnt_sum FROM words_rank ORDER BY cnt_sum DESC LIMIT 100" db, cursor = DBUtil.connect() cursor.execute(SQL) res = cursor.fetchall() data = list() for item in res: data.append({'word': str(item[0]), 'cnt': int(item[1])}) return data
def getAllUserBirthday(): SQL = "SELECT birthday div 1000 FROM user WHERE birthday>0" db, cursor = DBUtil.connect() cursor.execute(SQL) res = cursor.fetchall() data = list() for item in res: data.append(time.localtime(item[0])) return data
def getProvinceStat(): SQL = "SELECT province_name, cnt FROM province_rank" db, cursor = DBUtil.connect() cursor.execute(SQL) res = cursor.fetchall() data = list() for item in res: data.append({'province': str(item[0]), 'cnt': int(item[1])}) return data
def getAboutInfo(): SQL = "SELECT * FROM %s.system" % config.DB_NAME db, cursor = DBUtil.connect() cursor.execute(SQL) res = cursor.fetchall() data = {} for item in res: data[item[0]] = item[1] return data
def get_attrs_to_table_name_map(): if not User._USER_ATTRIBUTES_TO_DB_TABLE: query = User._USER_ATTRIBUTES_TO_DB_TABLE_QUERY % (User._USER_ATTRIBUTES_TO_DB_TABLE_TABLE_NAME) results = DBUtil.get_result_as_dicts() for result in results: attr = result[User._ATTRIBUTE_NAME_COLUMN] table = result[User._TABLE_NAME_COLUMN] User._USER_ATTRIBUTES_TO_DB_TABLE[attr] = table return User._USER_ATTRIBUTES_TO_DB_TABLE
def getGenderStat(): SQL = "SELECT gender, cnt FROM gender_rank" db, cursor = DBUtil.connect() cursor.execute(SQL) res = cursor.fetchall() data = list() gender = {1: '男', 2: '女', 0: '保密'} for item in res: data.append({'gender': gender[item[0]], 'cnt': int(item[1])}) return data
def getPublishStat(): SQL = "SELECT year,cnt FROM res_publish_stat ORDER BY year" db, cursor = DBUtil.connect() cursor.execute(SQL) res = cursor.fetchall() data = [] for item in res: if int(item[0]) <= 1970: continue data.append({'year': item[0], 'cnt': item[1]}) return data
def getSingerRank(): SQL = "SELECT singer_id,singer_name,song_cnt,comment_cnt FROM res_singer_rank ORDER BY comment_cnt DESC LIMIT 20" db, cursor = DBUtil.connect() cursor.execute(SQL) res = cursor.fetchall() data = list() for item in res: data.append({ 'singer_id': int(item[0]), 'singer_name': item[1], 'song_cnt': int(item[2]), 'comment_cnt': int(item[3]) }) return data
import jieba import re from util import DBUtil, SYSUtil db = DBUtil.getConnection() cursor = db.cursor() stop_file = open('static/stop-words.txt') # '../static/' if run as __main__ stop_words = stop_file.read().strip().split() stop_file.close() GET_SQL = "SELECT song_id,lrc FROM res_lrc" PUT_SQL = "INSERT INTO res_word(song_id, word, cnt) VALUES(%s, %s, %s)" def checkChinese(word): return re.match('[\u4E00-\u9FA5\uF900-\uFA2D]', word) def work(lrc): cut_res = jieba.cut(lrc) res_list = [] for item in cut_res: item = item.strip() if item in stop_words or not item or not checkChinese(item): continue res_list.append(item) return res_list def count(words):
import random from util import DBUtil from functools import cmp_to_key db, cursor = DBUtil.connect() GET_WORD_VALUE_SQL = "SELECT word,value from res_word_value" cursor.execute(GET_WORD_VALUE_SQL) res = cursor.fetchall() values = {} for item in res: values[item[0]] = int(item[1]) def cmpSortByValue(x, y): vx = values.get(x, 0) vy = values.get(y, 0) if vx == vy: return 0 if vx > vy: return -1 return 1 def calc(words): words.sort(key=cmp_to_key(cmpSortByValue)) cnt = 0 value_sum = 0 used_words = []
def _get_conn(self): if not User._CONN: User._CONN = DBUtil.get_connection(ConfigReader().get_configuration(ConfigReader.CONNECTION_NAME)) return User._CONN