Exemple #1
0
def search(timestamp, tag='photoslicebot'):
    '''
    Ищет фотографии по хэштегу
    :param timestamp:
    :param tag:
    :return:
    '''
    posts = []

    url = "https://www.instagram.com/explore/tags/%s/" % tag
    payload = {'__a': '1'}

    res = requests.get(url, params=payload).json()
    edges = res['graphql']['hashtag']['edge_hashtag_to_media']['edges']

    #res = ie.tag(tag)

    codes = []

    for data in edges:
        # тут уже сортированные по дате посты
        data = data['node']
        if data['taken_at_timestamp'] > timestamp:
            if not data['is_video']:
                codes.append(data['shortcode'])
            else:
                log.info('CRAWLER: There is a video {}'.format(
                    data['shortcode']))

    for code in codes:
        image = ie.media(code)
        try:
            if 'edge_media_to_caption' in image.data:
                if 'edges' in image.data['edge_media_to_caption']:
                    if len(image.data['edge_sidecar_to_children']
                           ['edges']) > 1:

                        p = Post(
                            username=image.data['owner']['username'],
                            caption=image.data['edge_media_to_caption']
                            ['edges'][0]['node']['text'],
                            location=image.data['location'],
                            urls=list(
                                x['node']['display_url']
                                for x in image.data['edge_sidecar_to_children']
                                ['edges']),
                            date=image.data['taken_at_timestamp'],
                            code=code)
                        posts.append(p)
            else:
                log.warning('CRAWLER: Post {} isn\'t a panorama'.format(code))

        except Exception, err:
            log.error('CRAWLER: Get post {0} info error {1}'.format(code, err))
def Search(Query_List, Task_ID, Type, **kwargs):
    Data_to_Cache = []
    Cached_Data = []

    if kwargs.get('Limit'):

        if int(kwargs["Limit"]) > 0:
            Limit = kwargs["Limit"]

        else:
            Limit = 10

    else:
        Limit = 10

    Directory = General.Make_Directory(Plugin_Name.lower())

    logger = logging.getLogger()
    logger.setLevel(logging.INFO)

    Log_File = General.Logging(Directory, Plugin_Name.lower())
    handler = logging.FileHandler(os.path.join(Directory, Log_File), "w")
    handler.setLevel(logging.DEBUG)
    formatter = logging.Formatter("%(levelname)s - %(message)s")
    handler.setFormatter(formatter)
    logger.addHandler(handler)

    Cached_Data = General.Get_Cache(Directory, Plugin_Name)

    if not Cached_Data:
        Cached_Data = []

    Query_List = General.Convert_to_List(Query_List)

    for Query in Query_List:

        if Type == "User":
            Local_Plugin_Name = Plugin_Name + "-" + Type
            CSE_Response = instagram_explore.user(Query)
            CSE_JSON_Output_Response = json.dumps(CSE_Response, indent=4, sort_keys=True)
            Output_file = General.Main_File_Create(Directory, Local_Plugin_Name, CSE_JSON_Output_Response, Query, ".json")
            Posts = CSE_Response[0]["edge_owner_to_timeline_media"]["edges"]
            Output_Connections = General.Connections(Query, Local_Plugin_Name, "instagram.com", "Data Leakage", Task_ID, Local_Plugin_Name.lower())
            Current_Step = 0

            for Post in Posts:
                Shortcode = Post["node"]["shortcode"]
                URL = "https://www.instagram.com/p/" + Shortcode + "/"

                if URL not in Cached_Data and URL not in Data_to_Cache and Current_Step < int(Limit):

                    if Output_file:
                        Output_Connections.Output(Output_file, URL, General.Get_Title(URL))

                Data_to_Cache.append(URL)
                Current_Step += 1

        elif Type == "Tag":
            Local_Plugin_Name = Plugin_Name + "-" + Type
            CSE_Response = instagram_explore.tag(Query)
            CSE_JSON_Output_Response = json.dumps(CSE_Response, indent=4, sort_keys=True)
            Output_file = General.Main_File_Create(Directory, Local_Plugin_Name, CSE_JSON_Output_Response, Query, ".json")
            Posts = CSE_Response[0]["edge_hashtag_to_media"]["edges"]
            Output_Connections = General.Connections(Query, Local_Plugin_Name, "instagram.com", "Data Leakage", Task_ID, Local_Plugin_Name.lower())
            Current_Step = 0

            for Post in Posts:
                Shortcode = Post["node"]["shortcode"]
                URL = "https://www.instagram.com/p/" + Shortcode + "/"

                if URL not in Cached_Data and URL not in Data_to_Cache and Current_Step < int(Limit):

                    if Output_file:
                        Output_Connections.Output(Output_file, URL, General.Get_Title(URL))

                Data_to_Cache.append(URL)
                Current_Step += 1

        elif Type == "Location":
            Local_Plugin_Name = Plugin_Name + "-" + Type
            CSE_Response = location(Query)
            CSE_JSON_Output_Response = json.dumps(CSE_Response, indent=4, sort_keys=True)
            Output_file = General.Main_File_Create(Directory, Local_Plugin_Name, CSE_JSON_Output_Response, Query, ".json")
            Posts = CSE_Response[0]["edge_location_to_media"]["edges"]
            Output_Connections = General.Connections(Query, Local_Plugin_Name, "instagram.com", "Data Leakage", Task_ID, Local_Plugin_Name.lower())
            Current_Step = 0

            for Post in Posts:
                Shortcode = Post["node"]["shortcode"]
                URL = "https://www.instagram.com/p/" + Shortcode + "/"

                if URL not in Cached_Data and URL not in Data_to_Cache and Current_Step < int(Limit):

                    if Output_file:
                        Output_Connections.Output(Output_file, URL, General.Get_Title(URL))

                Data_to_Cache.append(URL)
                Current_Step += 1

        elif Type == "Media":
            Local_Plugin_Name = Plugin_Name + "-" + Type
            CSE_Response = instagram_explore.media(Query)

            if CSE_Response:
                CSE_JSON_Output_Response = json.dumps(CSE_Response, indent=4, sort_keys=True)
                Output_file = General.Main_File_Create(Directory, Local_Plugin_Name, CSE_JSON_Output_Response, Query, ".json")
                URL = "https://www.instagram.com/p/" + Query + "/"

                if URL not in Cached_Data and URL not in Data_to_Cache:

                    if Output_file:
                        Output_Connections = General.Connections(Query, Local_Plugin_Name, "instagram.com", "Data Leakage", Task_ID, Local_Plugin_Name.lower())
                        Output_Connections.Output(Output_file, URL, General.Get_Title(URL))

                Data_to_Cache.append(URL)

            else:
                logging.warning(General.Date() + " - " + __name__.strip('plugins.') + " - Invalid response.")

        else:
            logging.warning(General.Date() + " - " + __name__.strip('plugins.') + " - Invalid type provided.")

    if Cached_Data:
        General.Write_Cache(Directory, Data_to_Cache, Plugin_Name, "a")

    else:
        General.Write_Cache(Directory, Data_to_Cache, Plugin_Name, "w")
def Search(Query_List, Task_ID, Type, **kwargs):

    try:
        Data_to_Cache = []
        Directory = General.Make_Directory(Plugin_Name.lower())
        logger = logging.getLogger()
        logger.setLevel(logging.INFO)
        Log_File = General.Logging(Directory, Plugin_Name.lower())
        handler = logging.FileHandler(os.path.join(Directory, Log_File), "w")
        handler.setLevel(logging.DEBUG)
        formatter = logging.Formatter("%(levelname)s - %(message)s")
        handler.setFormatter(formatter)
        logger.addHandler(handler)
        Cached_Data = General.Get_Cache(Directory, Plugin_Name)
        Query_List = General.Convert_to_List(Query_List)
        Limit = General.Get_Limit(kwargs)

        for Query in Query_List:

            if Type == "User":
                Local_Plugin_Name = Plugin_Name + "-" + Type
                CSE_Response = instagram_explore.user(Query)
                CSE_JSON_Output_Response = json.dumps(CSE_Response,
                                                      indent=4,
                                                      sort_keys=True)
                Main_File = General.Main_File_Create(
                    Directory, Local_Plugin_Name, CSE_JSON_Output_Response,
                    Query, The_File_Extensions["Main"])
                Posts = CSE_Response[0]["edge_owner_to_timeline_media"][
                    "edges"]
                Output_Connections = General.Connections(
                    Query, Local_Plugin_Name,
                    "instagram.com", "Social Media - Person", Task_ID,
                    Local_Plugin_Name.lower())
                Current_Step = 0

                for Post in Posts:
                    Shortcode = Post["node"]["shortcode"]
                    URL = f"https://www.instagram.com/p/{Shortcode}/"
                    Title = "IG | " + General.Get_Title(URL)

                    if URL not in Cached_Data and URL not in Data_to_Cache and Current_Step < int(
                            Limit):
                        Response = requests.get(URL, headers=headers).text
                        Output_file = General.Create_Query_Results_Output_File(
                            Directory, Query, Local_Plugin_Name, Response,
                            Shortcode, The_File_Extensions["Query"])

                        if Output_file:
                            Output_Connections.Output([Main_File, Output_file],
                                                      URL, Title,
                                                      Plugin_Name.lower())
                            Data_to_Cache.append(URL)

                        else:
                            logging.warning(
                                f"{General.Date()} - {__name__.strip('plugins.')} - Failed to create output file. File may already exist."
                            )

                        Current_Step += 1

            elif Type == "Tag":
                Local_Plugin_Name = Plugin_Name + "-" + Type
                CSE_Response = instagram_explore.tag(Query)
                CSE_JSON_Output_Response = json.dumps(CSE_Response,
                                                      indent=4,
                                                      sort_keys=True)
                Main_File = General.Main_File_Create(
                    Directory, Local_Plugin_Name, CSE_JSON_Output_Response,
                    Query, The_File_Extensions["Main"])
                Posts = CSE_Response[0]["edge_hashtag_to_media"]["edges"]
                Output_Connections = General.Connections(
                    Query, Local_Plugin_Name,
                    "instagram.com", "Social Media - Person", Task_ID,
                    Local_Plugin_Name.lower())
                Current_Step = 0

                for Post in Posts:
                    Shortcode = Post["node"]["shortcode"]
                    URL = f"https://www.instagram.com/p/{Shortcode}/"
                    Title = "IG | " + General.Get_Title(URL)

                    if URL not in Cached_Data and URL not in Data_to_Cache and Current_Step < int(
                            Limit):
                        Response = requests.get(URL, headers=headers).text
                        Output_file = General.Create_Query_Results_Output_File(
                            Directory, Query, Local_Plugin_Name, Response,
                            Shortcode, The_File_Extensions["Query"])

                        if Output_file:
                            Output_Connections.Output([Main_File, Output_file],
                                                      URL, Title,
                                                      Plugin_Name.lower())
                            Data_to_Cache.append(URL)

                        else:
                            logging.warning(
                                f"{General.Date()} - {__name__.strip('plugins.')} - Failed to create output file. File may already exist."
                            )

                        Current_Step += 1

            elif Type == "Location":
                Local_Plugin_Name = Plugin_Name + "-" + Type
                CSE_Response = location(Query)
                CSE_JSON_Output_Response = json.dumps(CSE_Response,
                                                      indent=4,
                                                      sort_keys=True)
                Main_File = General.Main_File_Create(
                    Directory, Local_Plugin_Name, CSE_JSON_Output_Response,
                    Query, The_File_Extensions["Main"])
                Posts = CSE_Response[0]["edge_location_to_media"]["edges"]
                Output_Connections = General.Connections(
                    Query, Local_Plugin_Name, "instagram.com",
                    "Social Media - Place", Task_ID, Local_Plugin_Name.lower())
                Current_Step = 0

                for Post in Posts:
                    Shortcode = Post["node"]["shortcode"]
                    URL = f"https://www.instagram.com/p/{Shortcode}/"
                    Title = "IG | " + General.Get_Title(URL)

                    if URL not in Cached_Data and URL not in Data_to_Cache and Current_Step < int(
                            Limit):
                        Response = requests.get(URL, headers=headers).text
                        Output_file = General.Create_Query_Results_Output_File(
                            Directory, Query, Local_Plugin_Name, Response,
                            Shortcode, The_File_Extensions["Query"])

                        if Output_file:
                            Output_Connections.Output([Main_File, Output_file],
                                                      URL, Title,
                                                      Plugin_Name.lower())
                            Data_to_Cache.append(URL)

                        else:
                            logging.warning(
                                f"{General.Date()} - {__name__.strip('plugins.')} - Failed to create output file. File may already exist."
                            )

                        Current_Step += 1

            elif Type == "Media":
                Local_Plugin_Name = Plugin_Name + "-" + Type
                CSE_Response = instagram_explore.media(Query)

                if CSE_Response:
                    CSE_JSON_Output_Response = json.dumps(CSE_Response,
                                                          indent=4,
                                                          sort_keys=True)
                    Main_File = General.Main_File_Create(
                        Directory, Local_Plugin_Name, CSE_JSON_Output_Response,
                        Query, The_File_Extensions["Main"])
                    URL = f"https://www.instagram.com/p/{Query}/"
                    Title = "IG | " + General.Get_Title(URL)

                    if URL not in Cached_Data and URL not in Data_to_Cache:
                        Response = requests.get(URL, headers=headers).text
                        Output_file = General.Create_Query_Results_Output_File(
                            Directory, Query, Local_Plugin_Name, Response,
                            Shortcode, The_File_Extensions["Query"])

                        if Output_file:
                            Output_Connections = General.Connections(
                                Query, Local_Plugin_Name, "instagram.com",
                                "Social Media - Media", Task_ID,
                                Local_Plugin_Name.lower())
                            Output_Connections.Output([Main_File, Output_file],
                                                      URL, Title,
                                                      Plugin_Name.lower())
                            Data_to_Cache.append(URL)

                        else:
                            logging.warning(
                                f"{General.Date()} - {__name__.strip('plugins.')} - Failed to create output file. File may already exist."
                            )

                else:
                    logging.warning(
                        f"{General.Date()} - {__name__.strip('plugins.')} - Invalid response."
                    )

            else:
                logging.warning(
                    f"{General.Date()} - {__name__.strip('plugins.')} - Invalid type provided."
                )

        if Cached_Data:
            General.Write_Cache(Directory, Data_to_Cache, Plugin_Name, "a")

        else:
            General.Write_Cache(Directory, Data_to_Cache, Plugin_Name, "w")

    except Exception as e:
        logging.warning(
            f"{General.Date()} - {__name__.strip('plugins.')} - {str(e)}")
images = ie.tag_images('cat').data


import instagram_explore as ie

# Search location id
res = ie.location('213163910')
print(res.data)

# Next page
data, cursor = ie.location('213163910', res.cursor)

# Image only
images = ie.location_images('213163910').data

import instagram_explore as ie

# Search media code
res = ie.media('BFRO_5WBQfc')
print(res.data)

# Image only
image = ie.media_image('BFRO_5WBQfc').data




log.basicConfig(level=log.INFO)
HashTagSearchExample().extract_recent_tag("palmeiras")

Exemple #5
0
def test_media():
    res = ie.media('BFRO_5WBQfc')
    assert isinstance(res, tuple)
    assert isinstance(res.data, dict)
    assert res.cursor is None