예제 #1
0
    def getComments(self, posts_dict, csv_array):
        print(len(posts_dict), 'posts')
        for post_count, post_id in enumerate(posts_dict.keys()):
            resp = requests.get(self.url + '/comments/' + post_id +
                                '.json?depth=1',
                                headers={'User-agent': 'photoshopbot2'})
            if resp.ok:
                resp_json = resp.json()
                print('Getting comments', post_count, '/', self.posts)
                for count, commment in enumerate(
                        resp_json[1]['data']['children']):
                    #print(count)
                    if count >= self.comment_limit:  # count starts at 0
                        break
                    else:
                        try:
                            comm_img = commment['data']['body_html'].rsplit(
                                '"')[3]
                            if not (comm_img.endswith(
                                ('.jpg', '.png', '.gif', 'jpeg'))):
                                if comm_img[-1] == '/':
                                    pass
                                else:
                                    filepath = ('./data/images/')
                                    filename = ('p_' + post_id +
                                                comm_img.rsplit("/", 1)[1])

                                    os.makedirs(os.path.dirname(filepath +
                                                                filename),
                                                exist_ok=True)
                                    try:
                                        Imgur.ImgurDownloader(
                                            comm_img, filepath,
                                            filename).save_images()
                                        csv_array.append([filename, 1])
                                    except:
                                        count -= 1
                                        pass
                            else:
                                image = requests.get(
                                    comm_img,
                                    allow_redirects=True,
                                    stream=True)  #Request image
                                filepath = ('./data/images/')
                                filename = ('p_' + post_id +
                                            comm_img.rsplit("/", 1)[1])
                                os.makedirs(os.path.dirname(filepath +
                                                            filename),
                                            exist_ok=True)
                                open(filepath + filename,
                                     'wb').write(image.content)
                                csv_array.append([filename.split('.')[0], 1])
                        except (IndexError, KeyError):
                            count -= 1
                            pass
            else:
                print(resp.reason)
        #self.makeCSV(csv_array)
        return csv_array
예제 #2
0
def search_img(song_name):
    print(song_name)
    print(type(song_name))
    text = song_name
    df = pd.read_csv('temp/music_list.csv', index_col=False)
    img_url = df.loc[df['music_name'] == text]['music_sheet_url'].values[0]
    imgur_downloader.ImgurDownloader(imgur_url=img_url,
                                     file_name='song_img').save_images('image')
예제 #3
0
 def search_img(self, song_name):
     text = song_name
     dbx.files_download_to_file('temp/music_list.csv',
                                '/i_Piano/music_list.csv')
     df = pd.read_csv('temp/music_list.csv', index_col=False)
     img_url = df.loc[df['music_name'] == text]['music_sheet_url'].values[0]
     print(img_url)
     os.remove('temp/dst_image.png')
     imgur_downloader.ImgurDownloader(
         imgur_url=img_url, file_name='dst_image').save_images('temp')
예제 #4
0
def get_pictures(posts, path, dir_name_length):
    count = 0
    for post in posts:
        if post.url[8:17] == "i.redd.it":
            count += download_image(post.url, path, dir_name_length)
        elif post.url[8:17] == "v.redd.it":
            continue  # Maybe in a future version that supports downloading videos
        elif post.url[8:19] == "i.imgur.com":
            count += len(
                imgur.ImgurDownloader(post.url, path).save_images()[0])
    return count
예제 #5
0
def getImages(url, search_filter, limit, posts_dict=None):
    if not posts_dict:  # If no posts are passed, get post IDs and Images
        print('Getting Posts')
        resp = requests.get(url + '/' + search_filter + '/.json?count=' +
                            str(limit),
                            headers={'User-agent': 'photoshopbot2'})
        if resp.ok:
            resp_json = resp.json()
            posts_dict = {}
            for post_number in range(0, limit):
                post_id = resp_json['data']['children'][post_number]['data'][
                    'id']  # get ID
                post_img = resp_json['data']['children'][post_number]['data'][
                    'url']  # get img link
                posts_dict[post_id] = post_img  # add to Dict
                image = requests.get(post_img,
                                     allow_redirects=True,
                                     stream=True)  #Request image
                filename = ('./data/original' + '/p_' +
                            post_img.rsplit("/", 1)[1])
                #filename = ('./data/Images/'+post_id+'/p_'+post_img.rsplit("/",1)[1])
                os.makedirs(os.path.dirname(filename), exist_ok=True)
                open('./data/original' + '/p_' + post_img.rsplit("/", 1)[1],
                     'wb').write(image.content)
                #open('./data/Images/'+post_id+'/p_'+post_img.rsplit("/",1)[1], 'wb').write(image.content) #save image
            getImages(url, search_filter, limit,
                      posts_dict)  # Recursive call with post dicitonary
        else:
            print(resp.reason)
    else:  # If posts are passed, get comment IDs and Images for each post
        print('Getting Comments')
        for post_id in posts_dict.keys():
            resp = requests.get(url + '/comments/' + post_id + '.json?depth=1',
                                headers={'User-agent': 'photoshopbot2'})
            if resp.ok:
                resp_json = resp.json()
                comment_dict = {}
                for count, commment in enumerate(
                        resp_json[1]['data']['children']):
                    if count == 9:  # count starts at 0
                        break
                    else:
                        comm_id = commment['data']['id']
                        try:
                            comm_img = commment['data']['body_html'].rsplit(
                                '"')[3]
                            if not (comm_img.endswith(
                                ('.jpg', '.png', '.gif', 'jpeg'))):
                                if comm_img[-1] == '/': pass
                                else:
                                    filename = ('./data/photoShopped/' +
                                                post_id +
                                                comm_img.rsplit("/", 1)[1])
                                    #filename = ('./data/Images/'+post_id+'/commentImage/'+comm_img.rsplit("/",1)[1])
                                    os.makedirs(os.path.dirname(filename),
                                                exist_ok=True)
                                    try:
                                        Imgur.ImgurDownloader(
                                            comm_img, './data/photoShopped/'
                                        ).save_images()
                                        #Imgur.ImgurDownloader(comm_img, './data/Images/'+post_id+'/commentImage').save_images()
                                    except:
                                        count -= 1
                                        pass
                                #print(comm_img.rsplit('rel="image_src" href=')[1])
                            else:
                                image = requests.get(
                                    comm_img,
                                    allow_redirects=True,
                                    stream=True)  #Request image
                                filename = ('./data/photoShopped/' + post_id +
                                            comm_img.rsplit("/", 1)[1])
                                #filename = ('./data/Images/'+post_id+'/commentImage/'+comm_img.rsplit("/",1)[1])
                                os.makedirs(os.path.dirname(filename),
                                            exist_ok=True)
                                open(
                                    './data/photoShopped/' + post_id +
                                    comm_img.rsplit("/", 1)[1],
                                    'wb').write(image.content)
                                #open('./data/Images/'+post_id+'/commentImage/'+comm_img.rsplit("/",1)[1], 'wb').write(image.content) #save image
                        except IndexError:
                            count -= 1
                            pass

            else:
                print(resp.reason)
예제 #6
0
def getImages(url, search_filter, limit, posts_dict=None):
    postArr = []
    if not posts_dict:  # If no posts are passed, get post IDs and Images
        print('Getting Posts')
        resp = requests.get(url + '/' + search_filter + '/.json?count=' +
                            str(limit),
                            headers={'User-agent': 'photoshopbot2'})
        if resp.ok:
            resp_json = resp.json()
            posts_dict = {}
            print(len(resp_json['data']['children']))
            print(resp_json['data']['after'])
            for post_number in range(0, limit):
                print(post_number)
                post_id = resp_json['data']['children'][post_number]['data'][
                    'id']  # get ID
                post_img = resp_json['data']['children'][post_number]['data'][
                    'url']  # get img link
                posts_dict[post_id] = post_img  # add to Dict
                image = requests.get(post_img,
                                     allow_redirects=True,
                                     stream=True)  #Request image
                filepath = ('./data/images/o_' + post_img.rsplit("/", 1)[1])
                #filename = ('./data/Images/'+post_id+'/p_'+post_img.rsplit("/",1)[1])
                os.makedirs(os.path.dirname(filepath), exist_ok=True)
                open('./data/images/o_' + post_img.rsplit("/", 1)[1],
                     'wb').write(image.content)
                postArr.append(['o_' + post_img.rsplit("/", 1)[1], 0])
                #open('./data/Images/'+post_id+'/p_'+post_img.rsplit("/",1)[1], 'wb').write(image.content) #save image
            getImages(url, search_filter, limit,
                      posts_dict)  # Recursive call with post dicitonary
        else:
            print(resp.reason)
    else:  # If posts are passed, get comment IDs and Images for each post
        print('Getting Comments')
        for post_id in posts_dict.keys():
            resp = requests.get(url + '/comments/' + post_id + '.json?depth=1',
                                headers={'User-agent': 'photoshopbot2'})
            if resp.ok:
                resp_json = resp.json()
                for count, commment in enumerate(
                        resp_json[1]['data']['children']):
                    if count == 4:  # count starts at 0
                        break
                    else:
                        comm_id = commment['data']['id']
                        try:
                            comm_img = commment['data']['body_html'].rsplit(
                                '"')[3]
                            if not (comm_img.endswith(
                                ('.jpg', '.png', '.gif', 'jpeg'))):
                                if comm_img[-1] == '/':
                                    pass
                                else:
                                    filepath = ('./data/images/')
                                    filename = ('p_' + post_id +
                                                comm_img.rsplit("/", 1)[1])

                                    os.makedirs(os.path.dirname(filepath +
                                                                filename),
                                                exist_ok=True)
                                    try:
                                        Imgur.ImgurDownloader(
                                            comm_img, filepath,
                                            filename).save_images()
                                        comment_arr.append([filename, 1])
                                    except:
                                        count -= 1
                                        pass
                            else:
                                image = requests.get(
                                    comm_img,
                                    allow_redirects=True,
                                    stream=True)  #Request image
                                filepath = ('./data/images/')
                                filename = ('p_' + post_id +
                                            comm_img.rsplit("/", 1)[1])
                                os.makedirs(os.path.dirname(filepath +
                                                            filename),
                                            exist_ok=True)
                                open(filepath + filename,
                                     'wb').write(image.content)
                                comment_arr.append([filename, 1])
                        except IndexError:
                            count -= 1
                            pass

            else:
                print(resp.reason)

    csv_arr = comment_arr + postArr
    #print(csv_arr,'\t end')
    with open("./data/data_labels.csv", "w+", newline='') as my_csv:
        csvWriter = csv.writer(my_csv, delimiter=',')
        csvWriter.writerows(csv_arr)