Exemple #1
0
async def fetch_blog_post_published_count(keyword, start_date: datetime.date, end_date: datetime.date):
    """
    네이버 블로그 월 발행량 가져오기(기간별)
    """
    headers = {
        'authority': 'section.blog.naver.com',
        'sec-ch-ua': '"Google Chrome";v="89", "Chromium";v="89", ";Not A Brand";v="99"',
        'accept': 'application/json, text/plain, */*',
        'sec-ch-ua-mobile': '?0',
        'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.128 Safari/537.36',
        'sec-fetch-site': 'same-origin',
        'sec-fetch-mode': 'cors',
        'sec-fetch-dest': 'empty',
        'referer': 'https://section.blog.naver.com/Search/Post.nhn?pageNo=1&rangeType=ALL&orderBy=sim&keyword=%ED%8F%BC%ED%81%B4%EB%A0%8C%EC%A7%95',
        'accept-language': 'ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7',
    }

    params = {
        'countPerPage': '7',
        'currentPage': '1',
        'keyword': keyword,
        'orderBy': 'sim',
        'type': 'post',
        'startDate': start_date.isoformat(),
        'endDate': end_date.isoformat(),
    }

    async with aiohttp.ClientSession() as session:
        async with session.get('https://section.blog.naver.com/ajax/SearchList.nhn', headers=headers, params=params) as response:
            html = await response.text()

    # 월 발행량
    raw = html[6:]  # 앞에 쓸데없는 뭐가 많음
    res = json.loads(raw)
    return int(res['result']['totalCount'])
Exemple #2
0
async def fetch_category_shopping_trending_keywords(category_id, start_date: datetime.date, end_date: datetime.date):
    headers = {
        'authority': 'datalab.naver.com',
        'sec-ch-ua': '" Not;A Brand";v="99", "Google Chrome";v="91", "Chromium";v="91"',
        'accept': '*/*',
        'x-requested-with': 'XMLHttpRequest',
        'sec-ch-ua-mobile': '?0',
        'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36',
        'content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
        'origin': 'https://datalab.naver.com',
        'sec-fetch-site': 'same-origin',
        'sec-fetch-mode': 'cors',
        'sec-fetch-dest': 'empty',
        'referer': 'https://datalab.naver.com/shoppingInsight/sCategory.naver',
        'accept-language': 'ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7',
    }

    data = {
        'cid': category_id,
        'timeUnit': 'date',
        'startDate': start_date.isoformat(),
        'endDate': end_date.isoformat(),
        'age': '',
        'gender': '',
        'device': '',
        'page': '1',
        'count': '100'
    }

    response = requests.post(
        'https://datalab.naver.com/shoppingInsight/getCategoryKeywordRank.naver', headers=headers, data=data)

    # print(response.text)
    data = response.json()['ranks']
    return data
Exemple #3
0
async def fetch_keyword_graph_statistics(keyword, category_id, time_unit: TimeUnit, start_date: datetime.date, end_date: datetime.date):
    '''그래프 그리기용'''
    headers = {
        'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36',
        'referer': 'https://datalab.naver.com/shoppingInsight/sKeyword.naver',
        'origin': 'https://datalab.naver.com'
    }

    data = {
        'cid': category_id,  # category id
        'timeUnit': time_unit,  # time unit date/week/month
        'startDate': start_date.isoformat(),
        'endDate': end_date.isoformat(),
        # 'age': '30,40',  # 10/20/30/40/50/60 in commas
        'age': '',
        # 'gender': 'f',  # f or f,m or m
        'gender': '',
        # 'device': 'pc',  # '' 'pc' 'mo' 'pc,mo'
        'device': '',
        'keyword': keyword
    }

    statistics = {}
    async with aiohttp.ClientSession() as session:
        async with session.post('https://datalab.naver.com/shoppingInsight/getKeywordClickTrend.naver', headers=headers, data=data) as response:
            statistics['clickTrend'] = (await response.json(content_type=None))['result'][0]['data']
        # async with session.post('https://datalab.naver.com/shoppingInsight/getKeywordDeviceRate.naver', headers=headers, data=data) as response:
        #     statistics['deviceRate'] = (await response.json(content_type=None))['result'][0]['data']
        async with session.post('https://datalab.naver.com/shoppingInsight/getKeywordGenderRate.naver', headers=headers, data=data) as response:
            statistics['genderRate'] = (await response.json(content_type=None))['result'][0]['data']
        async with session.post('https://datalab.naver.com/shoppingInsight/getKeywordAgeRate.naver', headers=headers, data=data) as response:
            statistics['ageRate'] = (await response.json(content_type=None))['result'][0]['data']

    return statistics
Exemple #4
0
async def fetch_relative_ratio(keywords: List[str], start_date: datetime.date,
                               end_date: datetime.date, time_unit: TimeUnit):
    '''get relative ratio'''

    headers = {
        'X-Naver-Client-Id': '8VcP69maRqven9qJWV1b',
        'X-Naver-Client-Secret': 'BH21bBnIJz'
    }

    body = {
        'startDate':
        start_date.isoformat(),  # 전월부터
        'endDate':
        end_date.isoformat(),  # 이번달까지
        'timeUnit':
        time_unit.value,
        'keywordGroups':
        list(
            map(lambda keyword: {
                'groupName': keyword,
                'keywords': [keyword]
            }, keywords))
    }
    '''  "ratio": [
    {
      "data": [
        {
          "period": "2021-05-01",
          "ratio": 93.98417
        },
        {
          "period": "2021-06-01",
          "ratio": 100
        }
      ],
      "keywords": [
        "샴푸"
      ],
      "title": "샴푸"
    }
  ]'''

    req = requests.post("https://openapi.naver.com/v1/datalab/search",
                        headers=headers,
                        data=json.dumps(body))

    raw = req.json()['results']
    result = [{'keyword': d['keywords'][0], 'data': d['data']} for d in raw]

    return result
Exemple #5
0
def list_feriados(dataInicial: datetime.date, dataFinal: datetime.date):
    # Instancia o cliente do banco de dados NOSQL GCloud DataStore
    ds = get_client()
    # Prepara a query para consultar valores do índice IPCA
    query = ds.query(kind=TipoEntidade.FERIADOS.value)
    # Inclui filtros da consulta
    query.add_filter('dt_feriado', '>=', dataInicial.isoformat())
    query.add_filter('dt_feriado', '<=', dataFinal.isoformat())
    #Define ordenação da consulta
    query.order = ['dt_feriado']
    # Executa a consulta e armazena num dictionary
    feriados = query.fetch()
    # Trata os formatos retornados da lista de entidades
    feriados = builtin_list(map(from_datastore, feriados))
    return feriados
 def _do_validate(self, validated_date: datetime.date) -> bool:
     if validated_date:
         try:
             datetime.strptime(validated_date.isoformat(), self.__DATE_PATTERN)
         except ValueError:
             return False
     return True
Exemple #7
0
def crawl_library_group_room_availability(session: WebSession,
                                          date: datetime.date):
    page = session.get_broken_simplified_soup(urls.LIBRARY_GROUP_ROOMS,
                                              post_data={
                                                  'submit:reservas:es':
                                                  'Ver+disponibilidade',
                                                  'data': date.isoformat()
                                              })
    return parser.get_library_group_room_availability(page)
Exemple #8
0
def list_indices(indexador: str, dataInicial: datetime.date,
                 dataFinal: datetime.date):
    # Instancia o cliente do banco de dados NOSQL GCloud DataStore
    ds = get_client()
    # Prepara a query para consultar valores do índice IPCA
    query = ds.query(kind=TipoEntidade.INDICES.value)
    # Inclui filtros da consulta
    query.add_filter('tp_indice', '=', indexador)
    query.add_filter('dt_referencia', '>=', dataInicial.isoformat())
    query.add_filter('dt_referencia', '<=', dataFinal.isoformat())
    #Define ordenação da consulta
    query.order = ['dt_referencia']
    # Executa a consulta e armazena num dictionary
    # indices = list(query.fetch())
    indices = query.fetch()
    # Trata os formatos retornados da lista de entidades
    # indices = list(map(lambda e: _tratar_formatos(e), indices))
    indices = builtin_list(map(from_datastore, indices))
    return indices
    def remove_old_ts(self, output_dir, before_date: datetime.date):
        before_date += timedelta(1)
        old_houses = HouseTS.objects.filter(
            created__lt=before_date
        )

        total_house = old_houses.count()
        total_done = 0
        done_this_ite = 1
        self.stdout.ending = ''
        self.stdout.write("[HouseTS] Start to backup {} rows before {}.\n".format(
            total_house,
            before_date.isoformat()
        ))

        while done_this_ite:
            old_houses = HouseTS.objects.filter(
                created__lt=before_date
            )[:self.items_per_page]
            done_this_ite = 0

            for house in old_houses:
                sub_dir = '{}/{:04d}/{:02d}/{:02d}'.format(
                    self.ts_dir,
                    house.created.year,
                    house.created.month,
                    house.created.day
                )
                filename = 'house.{}.{}.json'.format(house.vendor.name, house.vendor_house_id)
                self.dump_row(
                    base_dir=output_dir,
                    sub_dir=sub_dir,
                    filename=filename,
                    house=house
                )
                house.delete()
                total_done += 1
                done_this_ite += 1
                self.stdout.write("\r[HouseTS] {:3.0f}% done".format(
                    100 * total_done / total_house
                ))
        
        self.stdout.write("\n[HouseTS] done!\n")
        self.stdout.ending = '\n'
Exemple #10
0
def list_indexadores(dt_referencia: datetime.date = None,
                     tipo_atualizacao: str = None):
    # Instancia o cliente do banco de dados NOSQL GCloud DataStore
    ds = get_client()
    # Prepara a query para consultar valores do índice IPCA
    query = ds.query(kind=TipoEntidade.INDEXADORES.value)
    # Inclui filtros da consulta caso passados
    if dt_referencia is not None:
        query.add_filter('dt_ult_referencia', '<', dt_referencia.isoformat())
    if tipo_atualizacao is not None:
        query.add_filter('tipo_atualizacao', '=', tipo_atualizacao)
    #Define ordenação da consulta
    query.order = ['dt_ult_referencia']
    # Executa a consulta e armazena num dictionary
    #lista = list(query.fetch())
    lista = query.fetch()

    entities = builtin_list(map(from_datastore, lista))

    return entities