def dowload_matting_dataset(output_dir):

    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)

    response = google_images_download.googleimagesdownload()
    response.download({
        "keywords": "portrait transparent background",
        "color_type": "transparent",
        "size": "medium",
        "limit": 500,
        "output_directory": output_dir,
        "chromedriver": "/usr/local/bin/chromedriver"})

    response = google_images_download.googleimagesdownload()
    response.download({
        "keywords": "texture background",
        "color_type": "full-color",
        "size": "medium",
        "limit": 500,
        "output_directory": output_dir,
        "chromedriver": "/usr/local/bin/chromedriver"})
Exemple #2
0
# -*- coding: utf-8 -*-
""" dl.py

	A module to search google and download images corresponding 
	to search terms. From:

		https://github.com/hardikvasa/google-images-download
"""

from google_images_download import google_images_download

response = google_images_download.googleimagesdownload()

args = ['french identity card']


def run():
    for arg in args:
        absolute_image_paths = response.download({
            'keywords':
            arg,
            'limit':
            200,  # Requires `chromedriver` for more than 100 image scrapes.
            # To download: https://sites.google.com/a/chromium.org/chromedriver/downloads (link live 8/30/18)
            'chromedriver':
            'C:/apps/chromedriver/chromedriver.exe',
            'proxy':
            'fr-proxy.groupinfra.com:3128'
        })

def downloadFiles(name):
    response = google_images_download.googleimagesdownload()  # class instantiation
    arguments = {"keywords": name, "limit": 50, "print_urls": True,
             'usage_rights': "labeled-for-reuse-with-modifications", "format": 'jpg'}  # creating list of arguments
    paths = response.download(arguments)  # passing the arguments to the function
Exemple #4
0
from google_images_download import google_images_download  # * livrarias
import sys

# * argumentos necessários
busca = input("Termo a pesquisar: ")
limit = input("quantidade a pesquisar: ")
formato = input("formato desejado: (jpg, gif, png, bmp, svg, webp, ico, raw) ")

response = google_images_download.googleimagesdownload(
)  # * instanciação da classe

# * argumentos
arguments = {
    "keywords": busca,
    "limit": limit,
    "print_urls": True,
    "delay": 1,
    "output_directory": "imagens",
    "prefix": busca,
    "format": formato
}
paths = response.download(arguments)  # * passandos os argumentos para a função
print(paths)
Exemple #5
0
def stuff(keyword):
    ColorList = []
    file = "cache/" + keyword.lower()
    storageClient = storage.Client()
    bucket = storageClient.get_bucket('askpalette.appspot.com')
    blob = bucket.blob(file)
    exists = storage.Blob(bucket=bucket, name=file).exists(storageClient)

    if exists:
        file = blob.download_as_string()
        if file:
            lines = file.decode().splitlines()
            for line in lines:
                list = line.split(",")
                ColorList.append(Color(list[0], list[1], list[2], list[3]))
            return ColorList

    try:
        response = google_images_download.googleimagesdownload(
        )  #class instantiation
        arguments = {
            "keywords": keyword,
            "limit": 3,
            "silent_mode": True,
            "no_numbering": True,
            "no_download": True
        }
        #arguments = {"keywords":keyword,"limit":5,"no_numbering":True}   #creating list of arguments
        paths = response.download(
            arguments)  #passing the arguments to the function

        while paths[0][keyword].__len__() < 1:
            paths = response.download(arguments)

        for uri in paths[0][keyword]:
            # Loads the image into memory
            if (__tooBig(uri)):
                continue
            print(uri)
            client = vision.ImageAnnotatorClient()
            image = vision.types.Image()
            image.source.image_uri = uri

            response = client.image_properties(image=image)
            props = response.image_properties_annotation
            #print('Properties:')

            #print(response)

            for colorData in props.dominant_colors.colors:
                color = Color(colorData.color.red, colorData.color.green,
                              colorData.color.blue, colorData.score)

                merged = False
                for existing in ColorList:
                    if existing.diff(color) < 10:
                        existing = __average(existing, color)
                        merged = True
                        break
                if not merged:
                    ColorList.append(color)
        ColorList2 = ColorList.copy()
        for existing in ColorList:
            for existing2 in ColorList2:
                threshold = 18
                if ColorList.__len__() < 7:
                    threshold = 5
                if existing != existing2 and existing.diff(
                        existing2) < threshold:
                    existing = __average(existing, existing2)
                    ColorList.remove(existing2)
                    ColorList2.remove(existing2)

        ColorList.sort(key=lambda color: color.score, reverse=True)

        uploadStr = ""
        for color in ColorList:
            uploadStr += (color.__str__() + '\n')

        storageClient = storage.Client()
        bucket = storageClient.get_bucket('askpalette.appspot.com')
        blob = bucket.blob(file)
        blob.upload_from_string(uploadStr)
    except:
        return None

    return ColorList


#stuff("music")
# print(Color(6,17,71,0).diff(Color(6,9,36,1)))
Exemple #6
0
def googleimage_download(dictionary):
    global Dict
    response = google_images_download.googleimagesdownload()
    arguments = Dict
    paths = response.download(arguments)
from google_images_download import google_images_download

downloader = google_images_download.googleimagesdownload()

# Download images. Unplash is a an image website, it serve as random image keyword.
downloader.download(
    {
        "keywords": "croissant,pain au chocolat,unsplash",
        "output_directory": "dataset",
        "limit": 500,
        "chromedriver": "/home/pierre/Development/Devoxx/python/venv/lib/python3.6/site-packages/chromedriver_binary/chromedriver"
    }
)
Exemple #8
0
def the_function(the_name):
	the_anime=the_name

	#main window
	window = Tk()
	window.title("Anime Synopsis \n")
	window.configure(background="black")
	window.geometry("1280x720")

	#scraping function
	def scrapeit(name_anime):
		anime_name=name_anime
		name_update=name_anime.replace (" ","%20")
		search_string_1="https://myanimelist.net/anime.php?q="
		final_search=search_string_1+name_update
		#print(final_search)
		search_init=requests.get(final_search)
		search_soup=BeautifulSoup(search_init.text,'lxml')
		the_final_link=""
		for link in search_soup.find_all('a', attrs={'class' : 'hoverinfo_trigger fw-b fl-l'},limit=1):

			the_final_link=link['href']

		return the_final_link

	def defineit(the_final_link):
		res=requests.get(the_final_link)
	#res_content=res.content
		soup=BeautifulSoup(res.text,"lxml")
		soup_re=soup.find_all("span", itemprop="description")
	#print(soup.title.string)
		for i in soup_re:
			return(i.text)

	def stateit(the_anime_link):

		res=requests.get(the_anime_link)
		soup=BeautifulSoup(res.text,"lxml")
		soup_re=soup.findAll("div",{ "id" : "content" })
		for i in soup_re:

			trash=i.text
		def Convert(string):
			li=list(string.split("\n"))
			return li
  
		list_re=Convert(trash)
		indexx=list_re.index("Status:",30)
		status_1=str(list_re[indexx+1])
		return status_1
		

	#background 
	response = google_images_download.googleimagesdownload()   #class instantiation
	arguments = {"keywords":str(the_name),"limit":1,"format":"jpg","size":"icon","silent_mode":1}   #creating list of arguments
	paths = response.download(arguments)
	path_1=str(paths[0].get(str(the_name)))
	path_2=path_1.replace('[','')
	path_3=path_2.replace(']','') 
	path_4=path_3.replace("'",'')
	#picture_1=PhotoImage(file=path_4)
	img = ImageTk.PhotoImage(Image.open(path_4))
	Label(window, image=img, bg="black").grid(row=2,column=0,sticky=N+S+E+W)
	#label2
	Label(window,text="Synopsis:\n", bg="black", fg="white", font="none 12 bold").grid(row=0, column=0,sticky=N+S+E+W)

	#output text
	output=tkscrolled.ScrolledText(window,width=100, height=6, wrap=WORD, background="white")
	output.grid(row=1, column=0, columnspan=3,sticky=N+S+E+W)

	output.delete(0.0, END)
	Label(window,text="\nStatus:",bg="black",fg="white",font="none 12 bold").grid(row=4,column=0,sticky=W)
	output_status=Text(window,width=50, height=2, wrap=WORD, background="white")
	output_status.grid(row=4, column=1, columnspan=1, sticky=W)
	output_status.delete(0.0,END)
	


	try:
		the_anime_link=scrapeit(the_name)
		defination=defineit(the_anime_link)
		status=stateit(the_anime_link)
		
	except:
		defination="Do you even watch Anime?"
	output.insert(END,defination)
	output_status.insert(END,status)
		
	#exit function
	def close_window():
		window.destroy()
		exit()

	#exit label
	Label(window,text="Click here to exit\n", bg="black", fg="white", font="none 12 bold").grid(row=6, column=0,sticky=N+S+E+W)

	#exit button
	Button(window,text="Exit", width=14, command=close_window).grid(row=7, column=0,sticky=N+S+E+W)

	#mainloop
	window.mainloop()
from google_images_download import google_images_download

options = {"keywords": None, "limit": None, "output_directory": None}

google_obj = google_images_download.googleimagesdownload()
def search(query, limit):
    response = google_images_download.googleimagesdownload()
    arguments = {"keywords": query, "limit": limit, "print_urls": True}
    paths = response.download(arguments)
    print(paths)
Exemple #11
0
 def __init__(self):
     # intantiate google image client gic object
     self.gic = google_images_download.googleimagesdownload()
     log.basicConfig(filename="tophitslogs.log", level=log.DEBUG)
def download_images(keys, no_of_url_required):
    response = google_images_download.googleimagesdownload()   #class instantiation
    arguments = {"keywords": keys,"limit":no_of_url_required,"print_urls":True}   
    paths = response.download(arguments)  
Exemple #13
0
headers = {
    'User-Agent':
    "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:66.0) Gecko/20100101 Firefox/66.0"
}  #User agent header
site_html = requests.get(
    img_url, headers).text  #raw html data of the search results page
soup = BS(
    site_html, "html.parser"
)  #creating a BeautifulSoup object using the raw data and a html parser

#This code will download the first thumbnail sized photo that it encounters
for element in soup.findAll('img'):

    url = element.get("src")  #gets the src link of image
    r = requests.get(url)  #gets the url data

    with open("./img.png", "wb") as photo:  #writes the url data to a file
        photo.write(r.content)
        break
    break
#as google doesnt allow webscraping on its search results, we will use an application called google_images_download
from google_images_download import google_images_download
response = google_images_download.googleimagesdownload()  #creating an object
arguments = {
    "keywords": keyword,
    "format": "jpg",
    "limit": 1,
    "print_urls": True,
    "size": "medium"
}  #function parameters
response.download(arguments)  #downloading the image
Exemple #14
0
 def __init__(self, source, validation_split=0.2):
     self.source = addSlash(source)
     self.download_settings["output_directory"] = self.source
     response = google_images_download.googleimagesdownload()
     self.download = response.download
Exemple #15
0
from google_images_download import google_images_download
import json

if __name__ == "__main__":
    all_diseases = [
        'Actinic keratoses', 'Basal cell carcinoma',
        'Benign keratosis-like lesions', 'Dermatofibroma', 'Melanocytic nevi',
        'Melanoma', 'Vascular lesions'
    ]

    image_scraper = google_images_download.googleimagesdownload()
    for disease_type in all_diseases:
        image_scraper.download({
            'keywords':
            f'"{disease_type}"',
            'extract_metadata':
            True,
            'language':
            'English',
            'limit':
            100,
            'no_directory':
            True,
            'output_directory':
            f'scraped_data/{disease_type}'.replace(' ', '_').lower()
        })
Exemple #16
0
        def submittion():
            global location

            location = location_entry.get()

            if len(location) == 0:
                messagebox.showinfo("Entry Error!",
                                    "Please Enter A Valid Location!")
            else:
                user_proof = username_get()

                response = google_images_download.googleimagesdownload()

                arguments = {
                    "keywords": location + " beautiful city images",
                    "limit": 5,
                    "print_urls": False,
                    "silent_mode": True,
                    "format": "png",
                    "no_directory": True,
                    "output_directory":
                    "C:/Users/aayus/Desktop/PROJECTS/LOGIN",
                    "save_source": "paths",
                    "exact_size": "320,160"
                }
                paths = response.download(arguments)

                with open("paths.txt", "r") as f:
                    splits = f.read()
                    content = splits.split("//")
                    loc1 = content[1]
                    loc2 = content[3]
                    loc3 = content[5]
                    loc4 = content[7]
                    loc5 = content[9]

                    req = loc1.split("\t")
                    req2 = loc2.split("\t")
                    req3 = loc3.split("\t")
                    req4 = loc4.split("\t")
                    req5 = loc5.split("\t")

                    filepath1 = req[0]
                    filepath2 = req2[0]
                    filepath3 = req3[0]
                    filepath4 = req4[0]
                    filepath5 = req5[0]

                    f.close()

                filepaths = [
                    filepath1, filepath2, filepath3, filepath4, filepath5
                ]

                val = randint(0, 4)

                display = filepaths[val]

                location_pic = PhotoImage(file=display)
                pic_label = Label(wnd, image=location_pic)
                pic_label.config(image=location_pic)
                pic_label.grid(row=2, column=1)

                try:
                    os.remove(filepath1)
                    os.remove(filepath2)
                    os.remove(filepath3)
                    os.remove(filepath4)
                    os.remove(filepath5)

                    os.remove('paths.txt')
                    winshell.recycle_bin().empty(confirm=False,
                                                 show_progress=False,
                                                 sound=False)
                    print("Removed")
                except:
                    print("Files don't exist.")

                wnd.update()
                wnd.mainloop()
Exemple #17
0
def gimg_downloader(arguments):
    response = googleimagesdownload()
    path_ = response.download(arguments)
    return path_
Exemple #18
0
def image_collector(topics, img_count, img_size):

    ####### DATA COLLECTION #######

    # creates a data directory if one does not exist. we will store images here
    try:
        os.makedirs('data')
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise

    # creates a 'train' and 'test' folders inside 'data'
    # if there are some already, it creates another different ones with a number
    get_ipython().system('cd data')
    count = 1
    try:
        os.makedirs('data/train1')
    except:
        if os.path.exists('data/train1') == True:
            while True:
                try:
                    os.makedirs('data/train' + str(count))
                    break
                except:
                    count += 1

    # creating 2 lists in case topics contain 2 words
    # topics_search keeps both words, to use for googleimagesdownload, if applicable
    topics_clean = []
    topics_search = []
    for topic in topics:
        try:
            topic = topic.replace(' ', '_')
            topics_clean.append(topic.split('_')[0])
            topics_search.append(topic)
        except:
            topics_clean.append(topic)

    # creating new directory for each topic and collecting images for it
    print('Collecting images...')
    for topic_clean in topics_clean:
        try:
            os.makedirs('data/train' + str(count) + '/' + topic_clean)
        except OSError as e:
            if e.errno != errno.EEXIST:
                raise

        # getting the index from topics_clean, so that we can match it with topics, and use that instead to collect images
        topic_index = topics_clean.index(topic_clean)
        search_term = topics_search[topic_index]
        folder_name = topic_clean

        # image collection using google_images_download
        response = google_images_download.googleimagesdownload()
        arguments = {
            'keywords': search_term,
            'size': 'medium',
            'limit': img_count,
            'format': 'jpg',
            'time_range': '{"time_min":"01/01/2018","time_max":"12/01/2018"}',
            'output_directory': 'data/',
            'image_directory': 'train' + str(count) + '/' + folder_name + '/',
            'silent_mode': True,
            'chromedriver': 'chromedriver.exe'
        }
        paths = response.download(arguments)

    ####### IMAGE PROCESSING #######

    X = []
    y = []

    print('Processing images...')
    for topic_clean in tqdm(topics_clean):
        # opening images in color, resizing them, and making each one into an array
        for f in glob.glob(
                os.path.join('data/train' + str(count), topic_clean, '*.jpg')):
            try:
                img = Image.open(str(f))
                img = img.convert('RGB')
                img = img.resize((img_size[0], img_size[1]))
                arr = image.img_to_array(img)

                # cropping images
                arr2d = extract_patches_2d(arr, patch_size=img_size)
                for crop in arr2d:
                    X.append(crop)
                    y.append(topic_clean)

            except:
                pass

    X = np.array(X)
    y = np.array(y)

    return X, y
 def __init__(self):
     self.downloader = google_images_download.googleimagesdownload()
Exemple #20
0
def model_tester(model, topics, img_size):

    # deletes validation folder, in case this function has been ran already
    try:
        shutil.rmtree('data/validation')
    except:
        pass

    # creates a validation folder
    os.makedirs('data/validation')

    topics_clean = []
    topics_search = []
    for topic in topics:
        try:
            topic = topic.replace(' ', '_')
            topics_clean.append(topic.split('_')[0])
            topics_search.append(topic)
        except:
            topics_clean.append(topic)

    # collecting 1 image for each class
    for topic_clean in topics_clean:

        try:
            os.makedirs('data/validation/' + topic_clean)
        except OSError as e:
            if e.errno != errno.EEXIST:
                raise

        # getting the index from topics_clean, so that we can match it with topics, and use that instead to collect images
        topic_index = topics_clean.index(topic_clean)
        search_term = topics_search[topic_index]
        folder_name = topic_clean

        # hides output from terminal. google_images_download will clutter otherwise
        original_stdout = sys.stdout
        text_trap = io.StringIO()
        sys.stdout = text_trap

        # image collection using google_images_download, only images downloaded in 2014 to minimize posibility of duplicate images
        response = google_images_download.googleimagesdownload()
        arguments = {
            'keywords': search_term,
            'size': 'medium',
            'limit': 3,
            'format': 'jpg',
            'time_range': '{"time_min":"01/01/2014","time_max":"12/01/2014"}',
            'output_directory': 'data/',
            'image_directory': 'validation/' + folder_name + '/',
            'silent_mode': True,
            'chromedriver': 'chromedriver.exe'
        }
        paths = response.download(arguments)

        # restores terminal output
        sys.stdout = original_stdout

    # for each image, show the image and use model to predict % chance for each class
    for topic_clean in topics_clean:

        topic_index = topics_clean.index(topic_clean)
        print('Image #' + str(topic_index + 1) + ' from ' + topic_clean +
              ' class: \n')

        list_of_img = glob.glob('data/validation/' + topic_clean + '/*')
        sorted_files = sorted(list_of_img, key=os.path.getmtime)

        path = sorted_files[0].replace('\\', '/')

        img = Image.open(path)
        img = img.convert('RGB')
        img = img.resize((img_size[0], img_size[1]))

        # making prediction on img
        img_pred = np.expand_dims(img, axis=0)
        pred = model.predict(img_pred)

        # setting up to show multiple images
        rows = len(topics)
        fig = plt.figure(figsize=(25, 25))
        fig.add_subplot(rows, 1, topic_index + 1)
        plt.imshow(img)

        #print probability of each image being each class
        for sub_topic in topics:
            subtopic_index = topics.index(sub_topic)
            print(
                f' The model predicted there is a {round((pred[0][subtopic_index]) * 100, 2)} % chance this is a {sub_topic}'
            )
        print('------------------------------------\t')

    return
Exemple #21
0
def pic(place):
    response = google_images_download.googleimagesdownload()   #class instantiation
    arguments = {"keywords":"%s %s" % (city, place),"limit":1,"print_urls":True}
    paths = response.download(arguments)
    img = Image(filename=paths[0][city + ' ' + place][0])
    return img
Exemple #22
0
def scrape_google_images(imageName, count):
    response = google_images_download.googleimagesdownload()
    arguments= {"keywords":imageName, "limit":count,"print_urls":True,  "chromedriver":'C:\WebDrivers\chromedriver.exe', 'prefix':imageName}
    paths = response.download(arguments)
def download_pics(keyword, limit):
    ImageSearch = google_images_download.googleimagesdownload()
    SearchArgs = {"keywords": keyword, "limit": limit, "format": "jpg"}
    ImageSearch.download(SearchArgs)
    return
Exemple #24
0
from google_images_download import google_images_download

# Creates Image Object ->
imgObject = google_images_download.googleimagesdownload()

search_queries = [
    'juventus juan cuadrado',
    '''buffon goalkeeper juventus''',
    '''atletico madrid diego costa''',
    '''paris keylor navas''',
]


def downloadImages(query):
    # Keyword arguments is the query that we give the application to run
    # The image format goes on the format
    # We then set the limit of the images
    # We set print URL, can also be set to save in a csv
    # The size of the image we want
    # Can be specified like the Google Image Tool ("large, medium, icon")
    # Aspect Ratio of the images to download. ("tall, square, wide, panoramic")

    arguments = {
        "keywords": query,
        "format": "gif",
        "limit": 100,
        "print_urls": True,
        "size": "medium",
        "aspect_ratio": "panoramic"
    }
    try:
Exemple #25
0
def main():
    # Reload the csv files from disk and store the data in a dataframe
    results = {}
    all_winners = {}
    categorie_data = {}
    best_catg_time = {}
    clean_awards = {}

    # Reload the wikidata from disk
    people = wikidata.call_wikidate('actors', 'actorLabel')
    people += wikidata.call_wikidate('directors', 'directorLabel')
    people += wikidata.call_wikidate('actresses', 'actorLabel')
    things = wikidata.call_wikidate('series', 'seriesLabel')
    people = [re.sub(r'[^\w\d\s]+', '', person_) for person_ in people]
    things = [re.sub(r'[^\w\d\s]+', '', thing_) for thing_ in things]

    # Load the csv files and clean data
    print("Load Dataframes")
    for year in resources.years:
        try:
            extractor = InfoExtractor()
            print("Start " + str(year) + " ...")
            print("Reading ...")
            extractor.read_dataframe("dirty_gg%s.csv" % year)
            print("Language ...")
            extractor.get_english_tweets("text", "language")
            print("Cleaning ...")
            extractor.clean_dataframe_column("text", "clean_upper")
            print("Lowering ...")
            extractor.make_to_lowercase("clean_upper", "clean_lower")
            print("Dropping ...")
            extractor.convert_time('timestamp_ms')
            extractor.drop_column("user")
            extractor.drop_column("id")
            extractor.drop_column("timestamp_ms")
            extractor.drop_column("language")
            resources.data[year] = extractor.get_dataframe()
            print("Finish " + str(year) + " ...")
            results[year] = {}
        except:
            print("Couldn't load Dataframes for" + str(year))
    print("Done Dataframes\n")

    # We start by finding the awards for each year
    print("Find Awards")
    for year in resources.years:
        # try:
            chunker = Chunker()
            categorie_data[year] = resources.data[year].copy()
            categorie_data[year]['categorie'] = categorie_data[year].apply(chunker.extract_wrapper, axis=1)
            categorie_data[year] = categorie_data[year].loc[categorie_data[year].categorie != 'N/a', :]
            categorie_data[year].reset_index(drop=True, inplace=True)
            categorie_data[year] = categorie_data[year].loc[categorie_data[year].categorie.str.split().map(len) > 3, :]
            best_categories = chunker.pick_categories(categorie_data[year])
            best_categories = chunker.filter_categories(best_categories)
            for i in best_categories:
                if 'actor' in i:
                    i.replace('actor','performance by an actor')
                elif 'actress' in i :
                    i.replace('actress','performance by an actress')
                if 'tv' in i:
                    i.replace('tv','television')
                elif 'picture' in i and 'motion' not in i:
                    i.replace('picture','motion picture')
                if 'series' in i and 'television' not in i:
                    i.replace('series','television series')
            results[year]["Awards"] = best_categories
        # except:
        #     print("Couldn't find awards for " + str(year))
    print("Done Awards\n")

    # Find the point in time when an award took place
    print("Find Times")
    for year in resources.years:
        try:
            if year in [2013, 2015]:
                awards = resources.OFFICIAL_AWARDS_1315
            else:
                awards = resources.OFFICIAL_AWARDS_1819

            info_extract = InfoExtractor()
            for each_award in awards:
                clean_awards[each_award] = info_extract.clean_tweet(each_award)

            categorie_data[year]['real_categorie'] = categorie_data[year]['categorie'].apply(lambda x: fuzz_(x, clean_awards))
            categorie_data[year] = categorie_data[year].loc[categorie_data[year]['real_categorie'] != 'N/a', :]
            categorie_data[year].reset_index(drop=True, inplace=True)

            data_catg = categorie_data[year].groupby(['hour', 'minute', 'real_categorie']).count()['clean_lower'].unstack().reset_index()
            data_catg = data_catg.dropna(how='all', axis=1)

            best_catg_time[year] = {}
            for each_ in list(data_catg.columns):
                if not each_ in ['hour', 'minute']:
                    best_catg_time[year][each_] = []
                    max_idx = data_catg[each_].idxmax()
                    best_catg_time[year][each_].append(
                        (data_catg.iloc[max_idx - 2]['hour'], data_catg.iloc[max_idx - 2]['minute']))
                    best_catg_time[year][each_].append(
                        (data_catg.iloc[max_idx - 1]['hour'], data_catg.iloc[max_idx - 1]['minute']))
                    best_catg_time[year][each_].append(
                        (data_catg.iloc[max_idx]['hour'], data_catg.iloc[max_idx]['minute']))
                    best_catg_time[year][each_].append(
                        (data_catg.iloc[max_idx + 1]['hour'], data_catg.iloc[max_idx + 1]['minute']))
                    best_catg_time[year][each_].append(
                        (data_catg.iloc[max_idx + 2]['hour'], data_catg.iloc[max_idx + 2]['minute']))
        except:
            print("Couldn't find times for " + str(year))
    print("Finished Times for award ceremony\n")

    # We search for the hosts
    print("Searching for Host(s)")
    for year in resources.years:
        try:
            host_categorizer = TweetCategorizer([resources.HOST_WORDS], [], "host_tweet", resources.data[year], 0,resources.data[year].shape[0])
            host_tweets = host_categorizer.get_categorized_tweets()
            hosters = host_categorizer.find_percentage_of_entities(host_tweets, 0.2, people, [])
            results[year]["Hosts"] = hosters[resources.HOST_WORDS]
        except:
            print("Couldn't find Hosts for " + str(year))
    print("Found the Hosts!\n")

    # Search for the winners
    print("Searching for Winners...")
    for year in resources.years:
        try:
            all_winners[year] = []
            awards = resources.OFFICIAL_AWARDS_1315
            if year in [2018, 2019]:
                awards = resources.OFFICIAL_AWARDS_1819
            winner_categorizer = TweetCategorizer(awards, resources.STOPWORDS, "award", resources.data[year], 3,
                                                  resources.data[year].shape[0])
            winner_tweets = winner_categorizer.get_categorized_tweets()
            winners = winner_categorizer.find_list_of_entities(winner_tweets, 1, people,things + wikidata.call_wikidate("films", "filmLabel",str(year - 2),str(year)))
            for key in winners:
                results[year][key] = {}
                if winners[key]:
                    results[year][key]["Winner"] = winners[key][0]
                else:
                    results[year][key]["Winner"] = ""
                all_winners[year].append(winners[key])
        except:
            print("Couldn't find Winners for the year " + str(year))
    print("Found all the Winners!\n")

    # Identifying the presenters for the specified year
    print("Searching for Presenters")
    for year in resources.years:
        try:
            for key, value in best_catg_time[year].items():
                data_new = pd.DataFrame(columns=list(resources.data[year].columns))

                for each_value in value:
                    data_temp = resources.data[year].loc[(resources.data[year].hour == int(each_value[0])), :]
                    data_temp = data_temp.loc[(data_temp.minute == int(each_value[1])), :]
                    data_new = pd.concat([data_new, data_temp])

                presenter_categorizer = TweetCategorizer([resources.PRESENTER_WORDS], [], "presenter_tweet", data_new,
                                                         0,
                                                         data_new.shape[0])
                presenter_tweets = presenter_categorizer.get_categorized_tweets()

                # presenters = find_names(presenter_tweets.clean_upper.tolist(),2,people,all_winners[year],results[year]["Hosts"])
                presenters = presenter_categorizer.find_list_of_entities(presenter_tweets, 3, people, [], people=True)
                presenters = [p for p in presenters[list(presenters.keys())[0]] if
                              (p not in all_winners[year]) and (p not in results[year]["Hosts"])]

                results[year][key]['Presenters'] = presenters[-3:]

            if year in [2013, 2015]:
                awards = resources.OFFICIAL_AWARDS_1315
            else:
                awards = resources.OFFICIAL_AWARDS_1819

            for each_ in awards:
                if not each_ in best_catg_time[year].keys():
                    results[year][each_]['Presenters'] = []
        except:
            print("Couldn't find presenters for " + str(year))
    print("Found the Presenters!\n")

    # Identify the nominees for each year
    print("Looking for Nominees...")
    for year in resources.years:
        try:
            for key, value in best_catg_time[year].items():
                data_new = pd.DataFrame(columns=list(resources.data[year].columns))

                for each_value in value:
                    data_temp = resources.data[year].loc[(resources.data[year].hour == int(each_value[0])), :]
                    data_temp = data_temp.loc[(data_temp.minute == int(each_value[1])), :]
                    data_new = pd.concat([data_new, data_temp])

                nominee_categorizer = TweetCategorizer([resources.NOMINEE_WORDS], [], "nominee_tweet", data_new, 0,
                                                       data_new.shape[0])
                nominee_tweets = nominee_categorizer.get_categorized_tweets()

                # presenters = find_names(presenter_tweets.clean_upper.tolist(),2,people,all_winners[year],results[year]["Hosts"])
                if ('actress' in key.split()):
                    nominees = nominee_categorizer.find_list_of_entities(nominee_tweets, 6,
                                                                         wikidata.call_wikidate('actresses',
                                                                                                'actorLabel'),
                                                                         [], people=True)
                elif ('actor' in key.split()):
                    nominees = nominee_categorizer.find_list_of_entities(nominee_tweets, 6,
                                                                         wikidata.call_wikidate('actors', 'actorLabel'),
                                                                         [],
                                                                         people=True)
                elif ('director' in key.split()):
                    nominees = nominee_categorizer.find_list_of_entities(nominee_tweets, 6,
                                                                         wikidata.call_wikidate('directors',
                                                                                                'actorLabel'),
                                                                         [], people=True)
                else:
                    nominees = nominee_categorizer.find_list_of_entities(nominee_tweets, 6, [],
                                                                         things + wikidata.call_wikidate("films",
                                                                                                         "filmLabel",
                                                                                                         str(year - 2),
                                                                                                         str(year)))

                nominees = [p for p in nominees[list(nominees.keys())[0]] if (p not in all_winners[year]) and (
                        p not in results[year]["Hosts"] and (p not in results[year][key]['Presenters']))]

                results[year][key]['Nominees'] = nominees[-6:]

            if year in [2013, 2015]:
                awards = resources.OFFICIAL_AWARDS_1315
            else:
                awards = resources.OFFICIAL_AWARDS_1819

            for each_ in awards:
                if not each_ in best_catg_time[year].keys():
                    results[year][each_]['Nominees'] = []
        except:
            print("Couldn't find nominees for the year" + str(year))
    print("Found the Nominees!\n")

    # Searching for best and worst dress on the Red Carpet
    print("Looking for every mention of Dresses...")
    for year in resources.years:
        try:
            dress_categorizer = TweetCategorizer([resources.DRESS], [], "dress", resources.data[year], 0,
                                                 resources.data[year].shape[0])
            dress_tweets = dress_categorizer.get_categorized_tweets()

            best_dress_categorizer = TweetCategorizer([resources.BEST_DRESS], [], "best_dress", dress_tweets, 0,
                                                      dress_tweets.shape[0])
            best_dress_tweets = best_dress_categorizer.get_categorized_tweets()
            probs_best = best_dress_categorizer.list_probabilities(best_dress_tweets, 3, people, [], people=True)
            best_dressed = list(probs_best.keys())
            representative_best_tweets = []
            for b in best_dressed:
                for index, row in best_dress_tweets.iterrows():
                    if b in str(row["clean_upper"]):
                        representative_best_tweets.append(str(row["text"]))
                        break

            worst_dress_categorizer = TweetCategorizer([resources.WORST_DRESS], [], "worst_dress", dress_tweets, 0,
                                                       dress_tweets.shape[0])
            worst_dress_tweets = worst_dress_categorizer.get_categorized_tweets()
            probs_worst = worst_dress_categorizer.list_probabilities(worst_dress_tweets, 3, people, [], people=True)
            worst_dressed = list(probs_worst.keys())

            representative_worst_tweets = []
            for w in worst_dressed:
                for index, row in worst_dress_tweets.iterrows():
                    if w in str(row["clean_upper"]):
                        representative_worst_tweets.append(str(row["text"]))
                        break

            results[year]["BestDressed"] = probs_best
            results[year]["WorstDressed"] = probs_worst
            results[year]["BestDressedTweets"] = representative_best_tweets
            results[year]["WorstDressedTweets"] = representative_worst_tweets
        except:
            print("Couldn't find dresses for " + str(year))
    print("Found best and worsed Dressed celebrites\n")

    # Looking for memorable moments from the award ceremony
    print("Finding the most memorable Moments...")
    for year in resources.years:
        try:
            moment_categorizer = TweetCategorizer([resources.MOMENTS], [], "moments", resources.data[year], 0,
                                                  resources.data[year].shape[0])
            moment_tweets = moment_categorizer.get_categorized_tweets()
            link_finder = re.compile(r'\bhttp[^\s ]+\b')
            results[year]["Moments"] = {}
            for type in resources.MOMENT_TYPES:
                type_categorizer = TweetCategorizer([type], [], "jokes", moment_tweets, 0, moment_tweets.shape[0])
                type_tweets = type_categorizer.get_categorized_tweets()
                type_person = type_categorizer.find_list_of_entities(type_tweets, 1, people, [], people=True)[type]
                if len(type_person) > 0:
                    type_person = type_person[0]
                    results[year]["Moments"][type] = {}
                    results[year]["Moments"][type]["Person"] = type_person
                    for index, row in type_tweets.iterrows():
                        if type_person in str(row["clean_upper"]):
                            results[year]["Moments"][type]["Tweet"] = str(row["text"])
                            break
                    http_categorizer = TweetCategorizer(["http"], [], "links", type_tweets, 0, type_tweets.shape[0],
                                                        column="text")
                    http_tweets = http_categorizer.get_categorized_tweets()
                    http_tweets = http_tweets.reset_index(drop=True)
                    links = set()
                    if (len(http_tweets) > 0):
                        results[year]["Moments"][type]["Tweet"] = str(http_tweets["text"][0])
                        for index, row in http_tweets.iterrows():
                            matches = link_finder.findall(str(row["text"]))
                            for m in matches:
                                links.add(m)
                    results[year]["Moments"][type]["Link"] = list(links)[:3]
        except:
            print("Couldn't find moments for the year " + str(year))
    print("Found the best Moments!\n")

    # Finding people who were expected to win, but did not win
    print("Searching for the biggest Snubbed personalities from the show... ")
    for year in resources.years:
        try:
            snub_categorizer = TweetCategorizer([resources.SNUB], ["Golden", "Golden Globes", "Hollywood", "Globe Awards", "Disney","Oscar","Common"],"snub", resources.data[year], 0,resources.data[year].shape[0])
            snub_tweets = snub_categorizer.get_categorized_tweets()

            most_snub_categorizer = TweetCategorizer([resources.SNUB], [], "most_snub", snub_tweets, 0,snub_tweets.shape[0])
            most_snub_tweets = most_snub_categorizer.get_categorized_tweets()
            probs_most_snub = most_snub_categorizer.list_probabilities(most_snub_tweets, 2, people, [], people=True)
            most_snubbed = list(probs_most_snub.keys())
            results[year]["Snubbed"] = probs_most_snub
        except:
            print("Couldn't find snubbed celebs for " + str(year))
    print("Found all interestingly snubbed celebrities\n")

    # Preparing output files

    print("Write Markdown")
    markdown = ""
    for year in resources.years:
        markdown += "# " + str(year) + " Golden Globes\n"
        try:
            markdown += "## Hosts\n"
            for h in results[year]["Hosts"]:
                markdown += " - " + h + "\n"
        except:
            print("Couldn't write markdown hosts for " + str(year))

        try:
            markdown += "## Best Dressed\n"
            i = 1
            best_dressed = list(results[year]["BestDressed"].keys())
            for b in best_dressed:
                markdown += " " + str(i) + ". " + b + " (" + str(results[year]["BestDressed"][b]) + ") " + "\n"
                i += 1
            markdown += "\n"
            for b in best_dressed:
                response = google_images_download.googleimagesdownload()
                search = b + " " + str(year) + " Golden Globes Dress"
                arguments = {"keywords": search, "limit": 1,"format": "jpg", "print_urls": True}
                paths = response.download(arguments)
                print(paths)
                markdown += "<img src='file://" + paths[0][search][0] + "' height=300px alt='" + search + "'>  "
            markdown += "\n"
            markdown += "\n"
            for b in results[year]["BestDressedTweets"]:
                markdown += b + "  \n\n"
            markdown += "\n"
        except:
            print("Couldn't write markdown best dressed for " + str(year))

        try:
            markdown += "## Worst Dressed\n"
            i = 1
            worst_dressed = list(results[year]["WorstDressed"].keys())
            for w in worst_dressed:
                markdown += " " + str(i) + ". " + w + " (" + str(results[year]["WorstDressed"][w]) + ") " + "\n"
                i += 1
            markdown += "\n"
            for w in worst_dressed:
                response = google_images_download.googleimagesdownload()
                search = w + " " + str(year) + " Golden Globes Dress"
                arguments = {"keywords": search, "limit": 1, "print_urls": False}
                paths = response.download(arguments)
                print(paths)
                markdown += "<img src='file://" + paths[search][0] + "' height=300px alt='" + search + "'>  "
            markdown += "\n"
            markdown += "\n"
            for w in results[year]["WorstDressedTweets"]:
                markdown += w + "  \n\n"
            markdown += "\n"
        except:
            print("Couldn't write markdown worst dressed for " + str(year))

        try:
            i=1
            markdown += "## Who got Snubbed?\n"
            most_snubbed = list(results[year]["Snubbed"].keys())

            for b in most_snubbed:
                if len(b.split())>1:
                    markdown += " " + str(i) + ". " + b + " (" + str(results[year]["Snubbed"][b]) + ") " + "\n"
                    i += 1
            markdown += "\n"
            markdown += "\n"
        except:
            print("Couldn't write markdown snubbed for " + str(year))

        try:
            markdown += "#### Awards found\n"
            for a in results[year]["Awards"]:
                markdown += " - " + a + "\n"
        except:
            print("Couldn't write markdown awards for " + str(year))

        try:
            markdown += "## Moments\n"
            for moment in results[year]["Moments"]:
                markdown += "## " + moment.replace("|", " or ") + " moments\n"
                markdown += "##### Person:\n"
                markdown += "- " + results[year]['Moments'][moment]["Person"] + "\n"
                markdown += "##### Tweet:\n"
                markdown += "- " + results[year]['Moments'][moment]["Tweet"] + "\n"
                markdown += "##### Links:\n"
                for link in results[year]['Moments'][moment]["Link"]:
                    markdown += "- " + link + "\n"
                markdown += "\n"
        except:
            print("Couldn't write markdown moments for the year" + str(year))

        try:
            markdown += "## Awards\n"
            if year in [2013, 2015]:
                awards = resources.OFFICIAL_AWARDS_1315
            else:
                awards = resources.OFFICIAL_AWARDS_1819
            for cat in awards:
                markdown += "### " + cat + "\n"
                # Presenters
                markdown += "#####Presenters:\n"
                for a in results[year][cat]['Presenters']:
                    markdown += "- " + a + "\n"
                # Nominees
                markdown += "\n#####Nominees:\n"
                for a in results[year][cat]['Nominees']:
                    markdown += " - " + a + "\n"
                # Winner
                markdown += "\n#####Winner:\n"
                markdown += "- " + results[year][cat]['Winner'] + "\n"
        except:
            print("Couldn't write award results for the year " + str(year))
    print("Completed Markdown!\n")
    print("Please run - python autograder.py ",str(year))

    # Saving the final results as a Markdown (for easy access)
    with open('results.md', 'w') as file:
        file.write(markdown)

    # Saving the final results as JSON file (for autograder)
    with open("results.json", "w") as f:
        json.dump(results, f)

    return
 def __init__(self, project_directory):
     self.response = google_images_download.googleimagesdownload()
     self.download_directory = project_directory
def main(snake_db_fp: Path):
    sneks_already_encountered = []
    sneks_skipped = []
    sneks_already_encountered_fp = Path("sneks_encountered.json")
    if sneks_already_encountered_fp.exists():
        with sneks_already_encountered_fp.open() as f:
            j = json.load(f)
            sneks_already_encountered = j["encountered"]
            sneks_skipped = j["skipped"]
    with snake_db_fp.open() as f:
        snake_download_dir = Path.cwd() / "curated_downloads"
        snake_download_dir.mkdir(exist_ok=True)
        for snake in csv.DictReader(f):
            if (snake["index"] in sneks_already_encountered
                    or snake["index"] in sneks_skipped):
                continue
            else:
                try:
                    response = google_images_download.googleimagesdownload()
                    downloads = list(
                        response.download({
                            "keywords":
                            f"{snake['genus']} {snake['species']} {snake['common_name']}",
                            "limit": 4,
                        })[0].values())[0]
                    images = [Image.open(fp) for fp in downloads]
                    widths, heights = zip(*(i.size for i in images))
                    total_width = sum(widths)
                    max_height = max(heights)
                    new_im = Image.new("RGB", (total_width, max_height))
                    x_offset = 0
                    for im in images:
                        new_im.paste(im, (x_offset, 0))
                        x_offset += im.size[0]
                    new_im.show()
                    chosen_snake_idx = None
                    while True:
                        try:
                            chosen_snake_idx = (int(
                                input(
                                    f"Which snake do you choose? (Select: {list(range(1, len(images) + 1))}) "
                                )) - 1)
                            chosen_snake_fp = Path(
                                downloads.pop(chosen_snake_idx))
                        except ValueError:
                            raise NameError  # skip
                        except IndexError:
                            pass  # invalid input
                        else:
                            break
                    chosen_snake_fp.replace(
                        snake_download_dir /
                        f"{snake['index']}{chosen_snake_fp.suffix.lower()}")
                    sneks_already_encountered.append(snake["index"])
                    for other_download in downloads:
                        Path(other_download).unlink()
                except NameError:
                    sneks_skipped.append(snake["index"])
            subprocess.check_call("""osascript -e \'quit app "Preview"\'""",
                                  shell=True)
            with sneks_already_encountered_fp.open("wt") as f:
                json.dump(
                    {
                        "encountered": sneks_already_encountered,
                        "skipped": sneks_skipped,
                    },
                    f,
                )
Exemple #28
0
from google_images_download import google_images_download as gmd

google = gmd.googleimagesdownload()

#####################
# EDIT THIS SECTION #
#####################

keywords = [{
    "word": "family gathering, indian family, chinese family",
    "limit": 20
}, {
    "word":
    "china technology, china artificial island, london architecture, railway, busy city",
    "limit": 20
}, {
    "word":
    "business meeting real, real office, computer warehouse, amazon warehouse",
    "limit": 20
}, {
    "word":
    "manhattan street, india street, china street, street food, scotland village",
    "limit": 30
}, {
    "word": "scottish highlands, scottish castles",
    "limit": 20
}, {
    "word": "new year",
    "limit": 10
}]
from google_images_download import google_images_download  #importing the library

response = google_images_download.googleimagesdownload()  #class instantiation

arguments = {
    "keywords": "timber truck malaysia",
    "limit": 100,
    "print_urls": True
}  #creating list of arguments
paths = response.download(arguments)  #passing the arguments to the function
print(paths)
Exemple #30
0
def auto_video(query,summary,path_to_audio):    

    # case in colab add:
    # !pip install setuptools cookiejar git+https://github.com/Joeclinton1/google-images-download.git dhash ffmpeg-python pydub

    import urllib3
    import requests 
    from bs4 import BeautifulSoup
    import numpy as np
    import os
    import ffmpeg
    import gdown
    import pandas as pd
    from os import listdir
    from PIL import Image
    from google_images_download import google_images_download 
    import dhash
    from scipy.spatial.distance import hamming
    import json
    import re
    import os
    import math
    import numpy as np
    from pydub import AudioSegment


    print("###### init ######")
    
    title=re.sub(' ','_',query)
    title=re.sub(',','_',title)
    title=re.sub('\.','',title)
    query=re.sub('_',' ',title)

    response = google_images_download.googleimagesdownload() 

    def downloadimages(query): 
        # aspect ratio = the height width ratio of images to download ("tall, square, wide, panoramic") 
        arguments = {"keywords": query, 
              "format": "jpg", 
              "limit":10,
              "print_urls":True, 
              "size": "medium", 
              "aspect_ratio":"panoramic"} 
        try: 
            response.download(arguments)  
        except FileNotFoundError: 
            arguments = {"keywords": query, 
                "format": "jpg", 
                "limit":4, 
                "print_urls":True, 
                "size": "medium"} 
            try: 
                response.download(arguments) 
            except: 
                pass

    print("###### downloading images ######")
    downloadimages(query) 

    print("###### hashing images ######")

    def hashing(image,size=(25,25)):
        img = Image.open(image).convert("L")
        img2 = img.resize(size)
        row, col = dhash.dhash_row_col(img2)
        hash=dhash.format_hex(row, col)
        return(hash)

    print("###### saving hashes ######")

    dir='./downloads/' + query + '/'
    dirs = listdir(dir)

    print("###### calculating differences between images ######")

    hashes=[]
    for i in dirs:
        a=dir+i
        hash=hashing(a)
        hashes.append(hash)

    def dif_hashing(a,b):
        diff=dhash.get_num_bits_different(int(a,16),int(b,16))
        return(diff)

    list_difs_hashings=[0]
    for i in range(len(hashes)):
        for j in range(len(hashes)):
            if i<j:
                diff_hash = dif_hashing(hashes[i],hashes[j])
                if list_difs_hashings[0]==0:
                    list_difs_hashings[0]=diff_hash
                elif list_difs_hashings[0]<diff_hash:
                    list_difs_hashings.append(diff_hash)
                    list_difs_hashings.sort()
                    list_difs_hashings=list_difs_hashings[-4:]
                    list_difs_hashings.sort(reverse=True)


    images=[]
    for i in range(len(hashes)):
        for j in range(len(hashes)):
            if i<j:
                diff_hash = dif_hashing(hashes[i],hashes[j])
                if diff_hash in list_difs_hashings:
                    if dirs[i] not in images:
                        images.append(dirs[i])
                    if dirs[j] not in images:
                        images.append(dirs[j])
                if len(images)>4:
                    images=images[:4]
                    break

    if len(images)!=4:
      return("try again, less than 4 images")

    print("###### creating folders and saving the most different ones ######")

    os.mkdir('./downloads/diffimages')
    os.mkdir('./downloads/movie')
    for image in images:
        im=Image.open('./downloads/' + query + '/'+image)
        im.save('./downloads/diffimages/' + image)

    print("###### making a video out of images ######")
    SECONDS_BY_IMG=6
    FRAMERATE=1/SECONDS_BY_IMG
    stream=ffmpeg.input('downloads/diffimages/'+'*.jpg', pattern_type='glob', framerate=FRAMERATE).output('downloads/movie/'+title+'.mp4').run()

    summary_list_parts = summary.split(".")
    list_absolute_subtitle_time_by_phrase=[]
    for phrase in summary_list_parts:
        list_absolute_subtitle_time_by_phrase.append(SECONDS_BY_IMG)

    endtime_for_each_sub = np.cumsum(list_absolute_subtitle_time_by_phrase)
    init_time_for_each_sub = endtime_for_each_sub-list_absolute_subtitle_time_by_phrase
    endtime_for_each_sub = endtime_for_each_sub.tolist()
 
    print(type(endtime_for_each_sub),type(endtime_for_each_sub[1]))
    print("###### create rst file from summary ######")
    subtitles_path='downloads/movie/subtitles_of_'
    f = open(subtitles_path + title + '.rst', "w")
    for i in range(len(summary_list_parts)):
        if endtime_for_each_sub[i] < 10:
            endtime_for_each_sub_ = "0"+str(endtime_for_each_sub[i])
        else:
            endtime_for_each_sub_ = str(endtime_for_each_sub[i])

        if init_time_for_each_sub[i] < 10:
            init_time_for_each_sub_ = "0"+str(init_time_for_each_sub.tolist()[i])
        else:
            init_time_for_each_sub_ = str(init_time_for_each_sub[i])
        
        f.write(str(i+1)+"\n00:00:"+init_time_for_each_sub_+",00 --> 00:00:"+endtime_for_each_sub_+",00\n"+summary_list_parts[i]+"\n")

    f.close()
    f = open(subtitles_path + title + '.rst', "r")
    print("###### setting environment variables ######")
    os.environ["VIDEO"] = './downloads/movie/'+title +'.mp4'
    os.environ["SUBTITLES"] = subtitles_path +title+'.rst'
    os.environ["VIDEO_WITH_SUBTITLES_AND_AUDIO"] = "./downloads/movie/subtitled_with_music_"+title +'.mp4'
    os.environ["VIDEO_WITH_SUBTITLES"] = "./downloads/movie/subtitled_"+title +'.mp4'
    os.environ["AUDIO"] = path_to_audio
    AUDIO = path_to_audio

    print("###### creating a video with subtitles ######")
    !ffmpeg -i $VIDEO -vf subtitles=$SUBTITLES $VIDEO_WITH_SUBTITLES


    sound = AudioSegment.from_mp3(path_to_audio)

    sound_trimmed = sound[:SECONDS_BY_IMG*4*1000]
    sound_trimmed.export(path_to_audio, format="mp3")
    print("############\n\n\n\n\n",len(sound_trimmed))
    print("###### merging audio in video ######")
    !ffmpeg -i $VIDEO_WITH_SUBTITLES -i $AUDIO -c:v libx264 -vf format=yuv420p $VIDEO_WITH_SUBTITLES_AND_AUDIO




#from google.colab import drive 
# drive.mount('/content/gdrive')

# query = "Lifestyle choices can reduce risk for heartburn, study finds."
# summary = "Women who make healthy lifestyle choices can significantly reduce the risk of heartburn. Other thing. ooooother thing. one more thing"
# path_to_audio= 'gdrive/MyDrive/path/to/audio/audio.mp3'
# auto_video(query,summary,path_to_audio)
Exemple #31
0
def run_crawler(arguments):
    loader = google_images_download.googleimagesdownload()
    return loader.download(arguments)
Exemple #32
0
def run_crawler(arguments):
    loader = google_images_download.googleimagesdownload()
    return loader.download(arguments)