Example #1
0
def scrape_images(profile, num, startdate, enddate, out_dir):
    """
    Function to scrape images from Instagram using Instalooter
    
    Parameters:
    ----------
        profile:    name of the Instagram profile.
        num:        number of images to download.
        startdate:  Most recent date from which you wanna start downloading.
        enddate:    Date to specify the end of timeframe.
    
    Returns:
    --------
        
        output:     The path of the output directory
    
    """
    #output = 'images_'+str(startdate)+ '_' +str(enddate)
    looter = ProfileLooter(profile)
    #if not os.path.exists(output):
    #    os.makedirs(output)
    #output = "./"+output
    looter.download(output, media_count=int(num), timeframe=(startdate, enddate))
    
    return output
Example #2
0
class TestLogin(unittest.TestCase):
    def setUp(self):
        self.looter = ProfileLooter(USERNAME, template="test")
        self.destfs = fs.memoryfs.MemoryFS()

    def tearDown(self):
        self.destfs.close()

    def test_login(self):

        self.assertFalse(self.looter.logged_in())
        self.assertRaises(RuntimeError, self.looter.medias)
        self.assertFalse(self.looter._cachefs.exists(self.looter._COOKIE_FILE))

        try:
            self.looter.login(USERNAME, PASSWORD)
            self.assertTrue(self.looter.logged_in())
            self.assertTrue(
                self.looter._cachefs.exists(self.looter._COOKIE_FILE))
            self.assertTrue(next(self.looter.medias()))
        finally:
            self.looter.logout()
            self.assertFalse(
                self.looter._cachefs.exists(self.looter._COOKIE_FILE))

    def test_download(self):

        try:
            self.looter.login(USERNAME, PASSWORD)
            self.looter.download(self.destfs)
            self.assertTrue(self.destfs.exists('test.jpg'))
            self.assertEqual(self.destfs.getbytes('test.jpg')[6:10], b'JFIF')
        finally:
            self.looter.logout()
Example #3
0
    def test_issue_041(self):
        """
        Feature request by @liorlior

        Allow downloading only videos.
        """
        looter = ProfileLooter("nintendo", videos_only=True, session=self.session)
        day = datetime.date(2017, 3, 10)
        looter.download(self.destfs, timeframe=[day, day])
        self.assertTrue(self.destfs.isfile("1467639884243493431.mp4"))
Example #4
0
async def instagram_dl(igdl):
    """ To downloading photos from instagram account """
    uname = igdl.pattern_match.group(1)
    input_str = TEMP_DOWNLOAD_DIRECTORY
    if not os.path.exists(input_str):
        os.makedirs(input_str)
    try:
        await igdl.edit(f"`Getting info.....`")
        looter = ProfileLooter(uname)
        looter.download('TEMP_DOWNLOAD_DIRECTORY', media_count=5)

    except ValueError:
        await igdl.edit(
            f"**Account {uname} Not Found.**\nPlease enter correct username.")
        return

    except RuntimeError:
        await igdl.edit(f"**Can't Catch Media.**\nAccount {uname} is Private.")
        return

    await igdl.edit("Processing ...")
    lst_of_files = []
    for r, d, f in os.walk(input_str):
        for file in f:
            lst_of_files.append(os.path.join(r, file))
        for file in d:
            lst_of_files.append(os.path.join(r, file))
    LOGS.info(lst_of_files)
    uploaded = 0
    countf = "{}".format(len(lst_of_files))
    count = int(countf)
    if count == 0:
        await igdl.edit(
            "**No Media Found**\nSorry this account doesn't have any content")
    else:
        await igdl.edit(
            "Found {} files. Uploading will start soon. Please wait!".format(
                len(lst_of_files)))
        for single_file in lst_of_files:
            if os.path.exists(single_file):
                # https://stackoverflow.com/a/678242/4723940
                caption_rts = os.path.basename(single_file)
                c_time = time.time()
                if not caption_rts.lower().endswith(".mp4"):
                    await igdl.client.send_file(
                        igdl.chat_id,
                        single_file,
                        caption=f"[{uname}](https://instagram.com/{uname})",
                        force_document=True,
                        allow_cache=False,
                        progress_callback=lambda d, t: asyncio.get_event_loop(
                        ).create_task(
                            progress(d, t, igdl, c_time, "Uploading...",
                                     single_file)))
                    os.remove(single_file)
Example #5
0
    def test_issue_041(self):
        """Feature request by @liorlior

        Allow downloading only videos.
        """
        looter = ProfileLooter("nintendo", videos_only=True, session=self.session)
        day = datetime.date(2017, 3, 10)
        with contexter.Contexter() as ctx:
            ctx << mock.patch.object(looter, 'pages', MockPages('nintendo'))
            looter.download(self.destfs, timeframe=[day, day])
        self.assertEqual(self.destfs.listdir("/"), ["1467639884243493431.mp4"])
Example #6
0
    def test_issue_014(self):
        """Feature request by @JFLarsen.

        Allows customizing filenames using a template following Python
        `.format()` minilanguage.
        """
        looter = ProfileLooter("nintendo", template="{username}.{id}", session=self.session)
        with contexter.Contexter() as ctx:
            ctx << mock.patch.object(looter, 'pages', MockPages('nintendo'))
            looter.download(self.destfs, media_count=5)
        for f in self.destfs.scandir("/"):
            self.assertTrue(f.name.startswith('nintendo.'))
Example #7
0
    def test_issue_022(self):
        """
        Thanks to @kuchenmitsahne for reporting this bug.

        Checks that using ``{datetime}`` in the template does not put
        a Windows forbidden character in the filename.
        """
        FORBIDDEN = set('<>:"/\|?*')

        looter = ProfileLooter(
            "mysteryjets", template="{datetime}", session=self.session)
        looter.download(self.destfs, media_count=5)
        for f in self.destfs.scandir("/"):
            self.assertFalse(FORBIDDEN.intersection(f.name))
Example #8
0
    def test_issue_022(self):
        """
        Thanks to @kuchenmitsahne for reporting this bug.

        Checks that using ``{datetime}`` in the template does not put
        a Windows forbidden character in the filename.
        """
        FORBIDDEN = set('<>:"/\|?*')
        looter = ProfileLooter("nintendo", template="{datetime}", session=self.session)
        with contexter.Contexter() as ctx:
            ctx << mock.patch.object(looter, 'pages', MockPages('nintendo'))
            looter.download(self.destfs, media_count=5)
        for f in self.destfs.scandir("/"):
            self.assertFalse(FORBIDDEN.intersection(f.name))
Example #9
0
    def test_issue_014(self):
        """
        Feature request by @JFLarsen.

        Allows customizing filenames using a template following Python
        `.format()` minilanguage.
        """

        looter = ProfileLooter(
            "mysteryjets", template="{username}.{id}", session=self.session)
        looter.download(self.destfs, media_count=5)

        for f in self.destfs.scandir("/"):
            self.assertTrue(f.name.startswith('mysteryjets'))
Example #10
0
    def test_issue_009(self):
        """
        Thanks to @kurtmaia for reporting this bug.

        Checks that adding metadata to pictures downloaded from a hashtag
        works as well.
        """
        looter = ProfileLooter("fluoxetine", add_metadata=True, session=self.session)
        looter.download(self.destfs, media_count=10)

        for f in self.destfs.listdir("/"):
            exif = piexif.load(self.destfs.getbytes(f))
            self.assertTrue(exif['Exif']) # Date & Caption
            self.assertTrue(exif['0th'])  # Image creator
Example #11
0
    def test_issue_066(self):
        """
        Thanks to @douglasrizzo for reporting this bug.

        Check that likescount and commentscount can be used
        in filename templates without causing the program to
        crash.
        """
        looter = ProfileLooter(
            "zuck", get_videos=True, add_metadata=True,
            template='{id}-{likescount}-{commentscount}',
            session=self.session)
        looter.download(self.destfs, media_count=10)
        for image in self.destfs.listdir("/"):
            self.assertRegex(image, '[a-zA-Z0-9]*-[0-9]*-[0-9]*.(jpg|mp4)')
def scrapeVideos(username = "",
                 password = "",
                 output_folder = "",
                 days = 1):
        
    print("Starting Scraping")

    L = instaloader.Instaloader()

    # Login or load session for loader
    L.login(username, password)  
    profile = instaloader.Profile.from_username(L.context, username)
    following = profile.get_followees()
    print(following)

    for profile in following:
        acc = profile.username
        looter = ProfileLooter(acc, videos_only=True, template="{id}-{username}-{width}-{height}")
        if not looter.logged_in():
            looter.login(username, password)
        print("Scraping From Account: " + acc)

        today = datetime.date.today()
        timeframe = (today, today - dateutil.relativedelta.relativedelta(days=days))
        numDowloaded = looter.download(output_folder, media_count=30, timeframe=timeframe)
        print("Downloaded " + str(numDowloaded) + " videos successfully")
        print("")
Example #13
0
def _profile_images(username, destination='.instagram'):
    if not InstaLooter._logged_in():
        InstaLooter._login(username, password)

    looter = ProfileLooter(username)
    n = looter.download(destination=destination, new_only=True)

    path = pathlib.Path(destination)
    return list(path.glob('*'))
Example #14
0
class TestLogin(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        cls.session = requests.Session()
        InstaLooter._user_agent = cls.session.headers["User-Agent"]

    @classmethod
    def tearDownClass(cls):
        cls.session.close()
        del InstaLooter._user_agent

    def setUp(self):
        self.looter = ProfileLooter(USERNAME, template="test")
        self.destfs = fs.memoryfs.MemoryFS()

    def tearDown(self):
        self.destfs.close()

    def test_login(self):

        self.assertFalse(self.looter.logged_in())
        self.assertRaises(RuntimeError, self.looter.medias)
        self.assertFalse(self.looter._cachefs.exists(self.looter._COOKIE_FILE))

        try:
            self.looter.login(USERNAME, PASSWORD)
            self.assertTrue(self.looter.logged_in())
            self.assertTrue(
                self.looter._cachefs.exists(self.looter._COOKIE_FILE))
            self.assertTrue(next(self.looter.medias()))
        finally:
            self.looter.logout()
            self.assertFalse(
                self.looter._cachefs.exists(self.looter._COOKIE_FILE))

    def test_download(self):
        try:
            self.looter.login(USERNAME, PASSWORD)
            self.looter.download(self.destfs)
            self.assertTrue(self.destfs.exists('test.jpg'))
            self.assertEqual(self.destfs.getbytes('test.jpg')[6:10], b'JFIF')
        finally:
            self.looter.logout()
Example #15
0
 def post(self, target, number_media):
     looter = ProfileLooter(target)
     looter.download('Pictures/' + target, media_count=int(number_media))
     return "ok", 201
Example #16
0
def download(username: str, image_dir: str) -> None:
    """Downloading images from instagram."""
    logging.info("Downloading...")
    looter = ProfileLooter(username, template="insta-{datetime}-{id}")
    looter.download(image_dir)
    logging.info("Done downloading.")
Example #17
0
from instalooter.looters import ProfileLooter
import datetime
import dateutil.relativedelta

# instalooter_test downloads videos posted by daquan in the last month

# Instanciate
looter = ProfileLooter("daquan",
                       videos_only=True,
                       template="{id}-{username}-{width}-{height}")
looter.login("", "")

today = datetime.date.today()
thismonth = (today, today - dateutil.relativedelta.relativedelta(days=28))

looter.download('./Memes_December_4', media_count=50, timeframe=thismonth)
Example #18
0
 def test_profile(self, profile, **kwargs):
     looter = ProfileLooter(profile, session=self.session, **kwargs)
     looter.download(self.destfs, media_count=self.MEDIA_COUNT)
     self.assertGreaterEqual(len(self.destfs.listdir("/")),
                             self.MEDIA_COUNT)
Example #19
0
from instalooter.looters import ProfileLooter

instagram_account_name = "eleanor3069"

# Grab newest instagram posts

looter = ProfileLooter(instagram_account_name)
looter.download('./subjectimg/', media_count=5)
print("Finished downloading photos")
Example #20
0
from instalooter.looters import ProfileLooter
from os import listdir
import pandas as pd

instagram_account_name = "iampaintingrobot"

# Grab newest instagram post data

photorecord_dir = './instabotimagerecords/'

photorecord_csv_dir = photorecord_dir + 'procdimagelist.csv'

total_photo_list = listdir(photorecord_dir)

looter = ProfileLooter(instagram_account_name, dump_only=True)
looter.download(photorecord_dir, media_count=1)
print("Finished downloading photo data")

csvdf = pd.read_csv(photorecord_csv_dir)

non_matching_file_list = [
    file_name for file_name in total_photo_list
    if not any(file_name in proc_name for proc_name in csvdf['name'])
]

print("Non matching files: ")
print(non_matching_file_list)

if len(non_matching_file_list) == 1:

    # Move the file if necessary
Example #21
0
def checkInsta(domain, foldername):
    lt = ProfileLooter(domain)
    lt.download(foldername)