Beispiel #1
0
def getOneSongInfoCallback(result):
    data = result.get('data', [])
    if data:
        data = data[0]
        albums_info = data.get('album', {})
        song_name = data.get('name', '')
        song_url = data.get('url', '')
        song_mid = data.get('mid', '')
        song_qq_id = data.get('id', '')
        song_query_result = Song.objects(**{'strMediaMid': song_mid})
        if not len(song_query_result):
            logger.info('{} 歌曲信息缺失'.format(song_name))
            singers_info = result.get('singer', [])
            album_mid = albums_info.get('mid', '')
            album_qq_id = albums_info.get('id', '')
            album_name = albums_info.get('name', '')
            album_singers = []
            for singer_info in singers_info:
                singer_name = singer_info.get('name', '')
                singer_qq_id = singer_info.get('id', '')
                singer_mid = singer_info.get('mid', '')
                query_result = Singer.objects(**{'qq_id': singer_qq_id})
                if not len(query_result):
                    logger.info('{} 歌手信息缺失'.format(singer_name))
                    singer_obj = Singer()
                    singer_obj.qq_id = singer_qq_id
                    singer_obj.mid = singer_mid
                    singer_obj.name = singer_name
                else:
                    singer_obj = query_result[0]
                    album_singers.append(singer_obj)

            album_query_result = Album.objects(**{'mid': album_mid})
            if not len(album_query_result):
                logger.info('{} 专辑信息缺失'.format(album_name))
                album_obj = Album()
                album_obj.name = album_name
                album_obj.mid = album_mid
                album_obj.qq_id = album_qq_id
                album_obj.singer = album_singers
                album_obj.desc = ''
                album_obj.save()
            else:
                album_obj = album_query_result[0]
            song_obj = Song()
            song_obj.strMediaMid = song_mid
            song_obj.qq_id = song_qq_id
            song_obj.play_url = song_url
            song_obj.singers = album_singers
            song_obj.album = album_obj
            song_obj.save()
            logger.info('{} 歌曲增加成功'.format(song_name))

        else:
            logger.info('{} 歌曲库已经存在'.format(song_name))
Beispiel #2
0
def run():
    """ Wrapper function that runs the scripts and returns tables in csv format in the outputs directory. """

    start = timeit.default_timer()

    bq_runner = BigQueryDataExtractor(project="ace-digit-277918")
    bq_runner.wrapper_query_to_df(dataset="world_bank_wdi")

    end = timeit.default_timer()

    logger.info(f"Program taken: {(end-start)/60} to execute")
Beispiel #3
0
    def get_table_list(self, dataset: bigquery.DatasetReference) -> list:
        """ Function that takes a dataset and returns a list of the tables within that dataset.

        :type dataset: bigquery.DatasetReference
        :param dataset: the dataset reference object that you want to return a list of tables

        :rtype: list """

        tables_list = self.public_client.list_tables(dataset=dataset)
        logger.info(f"Retrieved tables in {dataset}")

        return list(tables_list)
Beispiel #4
0
    def extract_lyic(self, html, mid):
        song_query_result = Song.objects(**{'strMediaMid': mid})
        selector = Selector(text=html)
        lyric = selector.css('#lrc_content').extract()[0]
        lyric = remove_tags(lyric).strip('\n').strip('\r').strip(' ')
        # lyric = replace_escape_chars(lyric)
        # album = selector.css('body > div.main > div.mod_data > div > ul > li:nth-child(1) > a::text').extract_first()
        song_info = selector.css(
            '#lrc_content > p:nth-child(6)::text').extract_first().strip('\n')
        if len(song_query_result):
            song_obj = song_query_result[0]
            if not song_obj.lyric:
                lric_author = selector.css(
                    '#lrc_content > p:nth-child(7)::text').extract_first()
                composer = selector.css(
                    '#lrc_content > p:nth-child(8)::text').extract_first()
                lyric_writer = lric_author.strip('\n').lstrip(u'词:')
                compose_writer = composer.strip('\n').lstrip(u'曲:')

                song_obj.lyric = lyric
                song_obj.lyric_writer = lyric_writer
                song_obj.compose_writer = compose_writer
                logger.info('{} 写入歌词信息成功'.format(song_info))
            else:
                logger.info(u'%s 歌词 已经写入' % song_info)
        else:
            logger.info('写入歌词时,未查询到 {} 歌曲 信息'.format(song_info))
Beispiel #5
0
    def wrapper_query_to_df(self, dataset: bigquery.DatasetReference) -> None:
        """ Wrapper function that takes as dataset input and loops through the tables within the dataset and coverts the BigQuery table to a dataframe and exports as a CSV.

        :type dataset: bigquery.DatasetReference
        :param dataset: the dataset reference object

        :rtype: None """

        table_names = self.get_table_list(dataset=dataset)

        for table in table_names:
            sql = f"""SELECT * FROM `{table.project}.{table.dataset_id}.{table.table_id}`"""

            logger.info(f"Starting to collect data for {table.table_id}")
            response = self.bq_query_to_dataframe(sql)
            logger.info(f"Collected data for {table.table_id}")

            output = f"output/{table.table_id}.csv"
            response.to_csv(path_or_buf=output, index=True)
            logger.info(f"Exported data to {output}")
Beispiel #6
0
def add(x, y):

    result = HotSongs.objects().limit(1)
    logger.info(result)
    return x + y
Beispiel #7
0
 def on_success(self, retval, task_id, args, kwargs):
     logger.info("task {} done".format(task_id))
     return super(ErTask, self).on_success(
         retval, task_id, args, kwargs)