Beispiel #1
0
def get_spots():
    response = gspread.get_sheet_values(SHEET_ID_INSTAGRAM, "city", "FORMULA")
    _, city_list = gspread.convert_to_dict_data(response)

    response = gspread.get_sheet_values(SHEET_ID_INSTAGRAM, "spot", "FORMULA")
    label_list, spot_list = gspread.convert_to_dict_data(response)
    spot_names = {spot.get('spot') for spot in spot_list}

    for city in city_list:
        num = 1
        new_num = 0
        while True:
            try:
                driver = get_driver()
                page = '?page=%s' % (num)
                print(page)
                driver.get(BASE_URL + city['href'] + page)
                sleep(1)
                html_source = driver.page_source
                soup = BeautifulSoup(html_source, "lxml")

                main_tag = soup.find("main")
                list_tags = main_tag.find_all("li")
                for li in list_tags:
                    a_tag = li.find("a")

                    if not a_tag:
                        continue

                    spot = a_tag.text
                    if spot in spot_names:
                        continue

                    spot_list.append({
                        'city': city['city'],
                        'spot': spot,
                        'page': num,
                        'href': a_tag.get('href'),
                    })
                    print("NEW!", spot)
                    new_num += 1

                num += 1

            except Exception as e:
                pprint(e)
                break

            finally:
                driver.quit()

        print("NEW", new_num)
        values = gspread.convert_to_sheet_values(label_list, spot_list)
        body = {'values': values}
        gspread.update_sheet_values(SHEET_ID_INSTAGRAM, 'spot', body)

    print("SUCCESS!! get_spots")
Beispiel #2
0
def update_languages():
    response = gspread.get_sheet_values(SHEET_ID_INSTAGRAM, "hashtag",
                                        "FORMULA")
    label_list, hashtag_list = gspread.convert_to_dict_data(response)

    for index, hashtag in enumerate(hashtag_list):
        name = hashtag['name']
        print(name)

        try:
            detect_list = detect_langs(name)
            languages = [detect.lang for detect in detect_list]
            print(languages)

        except Exception as e:
            print(e)
            continue

        new_data = hashtag_list[index]
        new_data['languages'] = ','.join(languages)
        hashtag_list[index] = new_data

    body = {
        'values': gspread.convert_to_sheet_values(label_list, hashtag_list)
    }
    gspread.update_sheet_values(SHEET_ID_INSTAGRAM, 'hashtag', body)
    print("SUCCESS!! update_languages")
Beispiel #3
0
def update_hashtag():
    response = gspread.get_sheet_values(SHEET_ID_INSTAGRAM, "hashtag",
                                        "FORMULA")
    label_list, hashtag_list = gspread.convert_to_dict_data(response)

    data = get_hashtag()

    new_num = 0
    for d in data:
        name = d['name']
        index = next((index for index, hashtag in enumerate(hashtag_list)
                      if hashtag['name'] == name), None)

        if index is None:
            hashtag_list.append(d)
            print("NEW!!:", d.get('page'), d.get('name'))
            new_num += 1
            continue

        new_data = hashtag_list[index]
        new_data.update(d)
        hashtag_list[index] = new_data

    print("new:", new_num)
    hashtag_list = sorted(hashtag_list,
                          key=lambda k: k.get('num', 0) or 0,
                          reverse=True)
    body = {
        'values': gspread.convert_to_sheet_values(label_list, hashtag_list)
    }
    gspread.update_sheet_values(SHEET_ID_INSTAGRAM, 'hashtag', body)
    print("SUCCESS!! update_hashtag")
Beispiel #4
0
def get_sheet_values_list(sheet_name):
    r = redis.from_url(REDIS_URL)
    rcache = r.get(sheet_name)

    if rcache:
        print("cache HIT!! %s" % (sheet_name))
        person_list = json.loads(rcache.decode())

    else:
        response = gspread.get_sheet_values(SHEET_ID, sheet_name)
        person_label_list, person_list = gspread.convert_to_dict_data(response)
        r.set(sheet_name, json.dumps(person_list), ex=EXPIRE)

    return person_list
Beispiel #5
0
def get_location_japan():
    response = gspread.get_sheet_values(SHEET_ID_INSTAGRAM, "city", "FORMULA")
    label_list, city_list = gspread.convert_to_dict_data(response)
    city_names = {city.get('city') for city in city_list}

    url = "/explore/locations/JP/"
    print(url)

    num = 1
    while True:
        try:
            driver = get_driver()
            page = '?page=%s' % (num)
            print("page:", num)
            driver.get(BASE_URL + url + page)
            sleep(1)
            html_source = driver.page_source
            soup = BeautifulSoup(html_source, "lxml")

            main_tag = soup.find("main")
            list_tags = main_tag.find_all("li")
            for li in list_tags:
                a_tag = li.find("a")

                if not a_tag:
                    continue

                city = a_tag.text
                if city in city_names:
                    continue

                city_list.append({
                    'city': city,
                    'page': num,
                    'href': a_tag.get('href'),
                })
                print("NEW!", city)

            num += 1

        except Exception as e:
            pprint(e)
            break

        finally:
            driver.quit()

    body = {'values': gspread.convert_to_sheet_values(label_list, city_list)}
    gspread.update_sheet_values(SHEET_ID_INSTAGRAM, 'city', body)
    print("SUCCESS!! get_location_japan")
Beispiel #6
0
def add_hashtag_detail():
    try:
        driver = get_driver()

        # Login
        print("LOGIN START!!")
        driver.get(login_url)

        usernameField = driver.find_element_by_xpath(usernamePath)
        usernameField.send_keys(INSTAGRAM_USERNAME)

        passwordField = driver.find_element_by_xpath(passwordPath)
        passwordField.send_keys(INSTAGRAM_PASSWORD)

        passwordField.send_keys(Keys.RETURN)
        sleep(30)
        print("LOGIN FINISH!!")

        response = gspread.get_sheet_values(SHEET_ID_INSTAGRAM, "hashtag",
                                            "FORMULA")
        label_list, hashtag_list = gspread.convert_to_dict_data(response)

        count = 1
        for index, hashtag in enumerate(hashtag_list):

            # 進行状況を表示
            if index % 100 == 0:
                print("index:", index)

            # 100件ごとに保存する
            if count % 100 == 0:
                body = {
                    'values':
                    gspread.convert_to_sheet_values(label_list, hashtag_list)
                }
                gspread.update_sheet_values(SHEET_ID_INSTAGRAM, 'hashtag',
                                            body)
                print("count:", count)

            if hashtag.get('num'):
                continue

            new_hashtag = hashtag
            data = get_hashtag_detail(driver, hashtag['name'])
            new_hashtag.update(data)
            hashtag_list[index] = new_hashtag
            count += 1

        print("new:", count)
        hashtag_list = sorted(hashtag_list,
                              key=lambda k: k.get('num', 0) or 0,
                              reverse=True)
        body = {
            'values': gspread.convert_to_sheet_values(label_list, hashtag_list)
        }
        gspread.update_sheet_values(SHEET_ID_INSTAGRAM, 'hashtag', body)
        print("SUCCESS!! add_hashtag_detail")

    except Exception as e:
        pprint(e)

    finally:
        driver.quit()
Beispiel #7
0
def add_hashtag_list():
    try:
        driver = get_driver()

        # Login
        print("LOGIN START!!")
        driver.get(login_url)

        usernameField = driver.find_element_by_xpath(usernamePath)
        usernameField.send_keys(INSTAGRAM_USERNAME)

        passwordField = driver.find_element_by_xpath(passwordPath)
        passwordField.send_keys(INSTAGRAM_PASSWORD)

        passwordField.send_keys(Keys.RETURN)
        print("LOGIN FINISH!!")

        response = gspread.get_sheet_values(SHEET_ID_INSTAGRAM, "hashtag",
                                            "FORMULA")
        label_list, hashtag_list = gspread.convert_to_dict_data(response)

        count = 1
        new_num = 0
        for index, hashtag in enumerate(hashtag_list[:5]):

            # 進行状況を表示
            if index % 100 == 0:
                print("index:", index)

            # 100件ごとに保存する
            if count % 100 == 0:
                body = {
                    'values':
                    gspread.convert_to_sheet_values(label_list, hashtag_list)
                }
                gspread.update_sheet_values(SHEET_ID_INSTAGRAM, 'hashtag',
                                            body)
                print("count:", count)

            if 'ja' not in hashtag['languages']:
                continue

            data = get_hashtag_detail(driver, hashtag['name'])
            hashtag_set = data.get('hashtag_set', set())

            for new_tag in hashtag_set:

                find = next((index for hashtag in hashtag_list
                             if hashtag['name'] == new_tag), None)

                if find is not None:
                    continue

                hashtag_list.append({
                    'name': new_tag,
                    'update_at': data.get('update_at'),
                })
                print(new_tag)
                new_num += 1

            count += 1

        hashtag_list = sorted(hashtag_list,
                              key=lambda k: k.get('num', 0) or 0,
                              reverse=True)
        body = {
            'values': gspread.convert_to_sheet_values(label_list, hashtag_list)
        }
        gspread.update_sheet_values(SHEET_ID_INSTAGRAM, 'hashtag', body)
        print("new:", new_num)
        print("SUCCESS!! add_hashtag_detail")

    except Exception as e:
        pprint(e)

    finally:
        driver.quit()
Beispiel #8
0
def get_users_by_chache(params, sheet_name, expire=EXPIRE):
    print(params)
    key = str(params)

    r = redis.from_url(REDIS_URL)
    rcache = r.get(key)
    # rcache = False

    if rcache:
        print("cache HIT!! %s" % (key))
        result = json.loads(rcache.decode())
        return result

    response = gspread.get_sheet_values(SHEET_ID, sheet_name)
    person_label_list, person_list = gspread.convert_to_dict_data(response)

    person_list = [
        user for user in person_list
        if user.get('share_url') and int(user.get('aweme_count') or 0) > 0
    ]

    if params.get('sort'):
        person_list = sorted(person_list,
                             key=lambda k: int(k.get(params['sort'], 0) or 0),
                             reverse=True)

    gender = []
    account = []
    is_none = False
    if params.get('options'):
        for option in params['options']:
            if option in ['0', '1', '2']:
                gender.append(option)

            elif option in 'バッジなし':
                is_none = True

            else:
                account.append(option)

    if gender:
        person_list = [
            user for user in person_list if user.get('gender') in gender
        ]

    if account:

        if is_none is True:
            person_list = [
                user for user in person_list
                if user.get('custom_verify') in account
                or not user.get('custom_verify')
            ]

        else:
            person_list = [
                user for user in person_list
                if user.get('custom_verify') in account
            ]

    elif is_none is True:
        person_list = [
            user for user in person_list if not user.get('custom_verify')
        ]

    for index, person in enumerate(person_list):
        person['index'] = index

    start_num = 1
    page = int(params['page']) if params.get('page') else None
    if page:
        start_num = PER_PAGE * (page - 1)

    end_num = start_num + PER_PAGE

    result = []
    for user in person_list[start_num:end_num]:
        # 許可されたkeyのみ返す
        data = {k: v for k, v in user.items() if k in allowed_keys}

        data['avatar_thumb'] = data['avatar_thumb'].replace('.webp', '.jpeg')
        result.append(data)

    response = {
        'paging': create_paging_data(len(person_list), page),
        'user_list': result,
    }

    r.set(key, json.dumps(response), ex=expire)

    return response