def job():
    logger.LogInfo("Working...")
    youtube_link = redis.lpop(os.getenv('REDIS_YOUTUBE_VIDEO_LIST'))
    if (youtube_link):
        youtube_link = youtube_link.decode('ascii')
        # print("youtube_data: ", youtube_link)
        extract_data(youtube_link)
    logger.LogInfo("Job Done")
Beispiel #2
0
def save_data():
	logger.LogInfo("Working...")

	# pop out the last data of redis. (like stack in data structure)
	logger.LogInfo("Popping out last pushed data from redis")

	# "REDIS_YOUTUBE_VIDEO_DETAILS" comes from .env 
	details = redis.lpop(os.getenv('REDIS_YOUTUBE_VIDEO_DETAILS'))
	fieldnames = []
	# try:

	print("details::", details)
	if details:

		details = details.decode('ascii')
		
		# convert string to python dictionary
		details = json.loads(details)
		
		
		len_details = details.keys()
		
		# it will put all the header name to fieldnames as list.
		for i in range(len(len_details)):
			fieldnames.append(list(len_details)[i])

		# comma seperated values
		header = ', '.join(fieldnames)
		
		column_value = []
		for i in fieldnames:
			column_value.append('{}'.format(json.dumps(details[i])))
		column_value = ', '.join(column_value)

		csv_file_name = "YoutubeLinkDetails.csv"
		# assigning path of the csv file to save
		file_path = "./csv/{}".format(csv_file_name)
		if(os.path.exists(file_path)):
			# 'a' mode is used to append into the file
			with open(file_path, 'a') as fp: 
				
				fp.writelines("\n{}".format(column_value))
		else:
			# 1) 'w' mode is used to write into the file
			with open(file_path, 'w') as fp: 

				# 2) write header in 1st line
				fp.writelines(header)
				fp.writelines("\n{}".format(column_value))
				
		save_data()
	else:
		logger.LogInfo("process complete done")
def extract_data(youtube_link):
    # initializing a dictionary to store all the data
    details = {}
    try:

        driver = webdriver.Chrome()
        driver.get(youtube_link)
        time.sleep(3)
        details = {
            "link": youtube_link,
            'video_title': "",
            'hash_tags': "",
            'views': 0,
            'upload_date': None,
            'channel_name': "",
            'description': "",
            'duration': 0.0
        }

        # sleep is used to take time to reload the page
        response = driver.find_element_by_xpath(
            '//*[@id="container"]/h1/yt-formatted-string').text
        logger.LogInfo("Title extracted: {}".format(response))
        if (response):
            details["video_title"] = response

        response = driver.find_element_by_xpath(
            '//*[@id="container"]/yt-formatted-string').text
        logger.LogInfo("Hash tags extracted")
        if (response):
            details["hash_tags"] = response

        response = driver.find_element_by_xpath(
            '//*[@id="count"]/yt-view-count-renderer/span[1]').text
        logger.LogInfo("Views extracted")
        if (response):
            details["views"] = response

        response = driver.find_element_by_xpath(
            '//*[@id="date"]/yt-formatted-string').text
        logger.LogInfo("Upload date extracted")
        if (response):
            details["upload_date"] = response

        response = driver.find_element_by_xpath('//*[@id="text"]/a').text
        logger.LogInfo("Channel name extracted")
        if (response):
            details["channel_name"] = response

        time.sleep(3)

        response = driver.find_element_by_css_selector(
            '#movie_player > div.ytp-chrome-bottom > div.ytp-chrome-controls > div.ytp-left-controls > div.ytp-time-display.notranslate > span.ytp-time-duration'
        ).text
        logger.LogInfo("Duration extracted")
        if (response):
            details["duration"] = response

        driver.find_element_by_xpath(
            '//*[@id="more"]/yt-formatted-string').click()
        logger.LogInfo("Description extended")
        time.sleep(4)

        response = driver.find_element_by_xpath('//*[@id="description"]').text
        logger.LogInfo("Description extracted")
        if (response):
            details["description"] = response

        # print(details)
        logger.LogInfo('Succesfully extracted')

        redis.lpush(os.getenv('REDIS_YOUTUBE_VIDEO_DETAILS'),
                    json.dumps(details))
        logger.LogInfo('Details saved Succesfully in Redis')

    except Exception as e:
        logger.LogError('Something Not good with URL')
        details = {}

def validating(link):
    youtube_url = "https://www.youtube.com/watch?v="

    flag = False
    if link.startswith(youtube_url):
        flag = True

    return flag


link = args.link
isValid = True
isValid = validating(args.link)

if isValid:

    try:

        # starting connection to redis
        redis = connection()

        # to push into redis db
        redis.lpush(os.getenv('REDIS_YOUTUBE_VIDEO_LIST'), link)
        logger.LogInfo("youtube link queued")
    except Exception as e:
        logger.LogError("error: {}".format(str(e)))
else:
    logger.LogError("Link is not valid")