def getOneSongInfoCallback(result): data = result.get('data', []) if data: data = data[0] albums_info = data.get('album', {}) song_name = data.get('name', '') song_url = data.get('url', '') song_mid = data.get('mid', '') song_qq_id = data.get('id', '') song_query_result = Song.objects(**{'strMediaMid': song_mid}) if not len(song_query_result): logger.info('{} 歌曲信息缺失'.format(song_name)) singers_info = result.get('singer', []) album_mid = albums_info.get('mid', '') album_qq_id = albums_info.get('id', '') album_name = albums_info.get('name', '') album_singers = [] for singer_info in singers_info: singer_name = singer_info.get('name', '') singer_qq_id = singer_info.get('id', '') singer_mid = singer_info.get('mid', '') query_result = Singer.objects(**{'qq_id': singer_qq_id}) if not len(query_result): logger.info('{} 歌手信息缺失'.format(singer_name)) singer_obj = Singer() singer_obj.qq_id = singer_qq_id singer_obj.mid = singer_mid singer_obj.name = singer_name else: singer_obj = query_result[0] album_singers.append(singer_obj) album_query_result = Album.objects(**{'mid': album_mid}) if not len(album_query_result): logger.info('{} 专辑信息缺失'.format(album_name)) album_obj = Album() album_obj.name = album_name album_obj.mid = album_mid album_obj.qq_id = album_qq_id album_obj.singer = album_singers album_obj.desc = '' album_obj.save() else: album_obj = album_query_result[0] song_obj = Song() song_obj.strMediaMid = song_mid song_obj.qq_id = song_qq_id song_obj.play_url = song_url song_obj.singers = album_singers song_obj.album = album_obj song_obj.save() logger.info('{} 歌曲增加成功'.format(song_name)) else: logger.info('{} 歌曲库已经存在'.format(song_name))
def run(): """ Wrapper function that runs the scripts and returns tables in csv format in the outputs directory. """ start = timeit.default_timer() bq_runner = BigQueryDataExtractor(project="ace-digit-277918") bq_runner.wrapper_query_to_df(dataset="world_bank_wdi") end = timeit.default_timer() logger.info(f"Program taken: {(end-start)/60} to execute")
def get_table_list(self, dataset: bigquery.DatasetReference) -> list: """ Function that takes a dataset and returns a list of the tables within that dataset. :type dataset: bigquery.DatasetReference :param dataset: the dataset reference object that you want to return a list of tables :rtype: list """ tables_list = self.public_client.list_tables(dataset=dataset) logger.info(f"Retrieved tables in {dataset}") return list(tables_list)
def extract_lyic(self, html, mid): song_query_result = Song.objects(**{'strMediaMid': mid}) selector = Selector(text=html) lyric = selector.css('#lrc_content').extract()[0] lyric = remove_tags(lyric).strip('\n').strip('\r').strip(' ') # lyric = replace_escape_chars(lyric) # album = selector.css('body > div.main > div.mod_data > div > ul > li:nth-child(1) > a::text').extract_first() song_info = selector.css( '#lrc_content > p:nth-child(6)::text').extract_first().strip('\n') if len(song_query_result): song_obj = song_query_result[0] if not song_obj.lyric: lric_author = selector.css( '#lrc_content > p:nth-child(7)::text').extract_first() composer = selector.css( '#lrc_content > p:nth-child(8)::text').extract_first() lyric_writer = lric_author.strip('\n').lstrip(u'词:') compose_writer = composer.strip('\n').lstrip(u'曲:') song_obj.lyric = lyric song_obj.lyric_writer = lyric_writer song_obj.compose_writer = compose_writer logger.info('{} 写入歌词信息成功'.format(song_info)) else: logger.info(u'%s 歌词 已经写入' % song_info) else: logger.info('写入歌词时,未查询到 {} 歌曲 信息'.format(song_info))
def wrapper_query_to_df(self, dataset: bigquery.DatasetReference) -> None: """ Wrapper function that takes as dataset input and loops through the tables within the dataset and coverts the BigQuery table to a dataframe and exports as a CSV. :type dataset: bigquery.DatasetReference :param dataset: the dataset reference object :rtype: None """ table_names = self.get_table_list(dataset=dataset) for table in table_names: sql = f"""SELECT * FROM `{table.project}.{table.dataset_id}.{table.table_id}`""" logger.info(f"Starting to collect data for {table.table_id}") response = self.bq_query_to_dataframe(sql) logger.info(f"Collected data for {table.table_id}") output = f"output/{table.table_id}.csv" response.to_csv(path_or_buf=output, index=True) logger.info(f"Exported data to {output}")
def add(x, y): result = HotSongs.objects().limit(1) logger.info(result) return x + y
def on_success(self, retval, task_id, args, kwargs): logger.info("task {} done".format(task_id)) return super(ErTask, self).on_success( retval, task_id, args, kwargs)