def main(): stats = youtube.grabYouTubeSample(100) #print len(videos) print len(stats) #print videos print stats print "FINISHED COLLECTION" print "STARTING PARSING" TitleParser.parse_videos(stats) print "COMPUTING AVERAGES" TitleParser.compute_average_views() print "COMPLETE"
def process_stream(submission, reddit): title = submission.title if 'DISC' not in title.upper(): return title = title.upper() print(db.find_subscribers(tp.title(title))) print(title) for manga in db.all_manga(): if manga in title: send_alert(submission, reddit.redditor(config.REDDITOR)) print("alert {}".format(title)) # title = manga continue
def ScrapeLinkset(self): loopCount = 1 successCount = 1 captionContent = '' titleContent = '' srcContent = '' doesTitleExist = False for link in self.__links: self._browser.get(link) imageName = self.__keyword.replace( " ", "_") + '_%d.jpg' % (successCount) print('(%d/%d): ' % (loopCount, len(self.__links)) + link) loopCount += 1 # Getting image download links image = self.__wait.until( EC.presence_of_element_located(( By.CSS_SELECTOR, "div[class='Pj7 sLG XiG eEj m1e'] > div[class='XiG zI7 iyn Hsu'] > img" ))) try: print('Initial request: ' + image.get_attribute('src')) imageLink = self.__GetHighResImage(image.get_attribute('src')) print('Final Request: ' + imageLink) if (ImageFilter.IsImageGreaterThanBounds( imageLink, self.__horizontalMin, self.__verticalMin)): # Get title try: title = self.__wait.until( EC.presence_of_element_located( (By.CSS_SELECTOR, "h1[class='lH1 dyH iFc ky3 pBj DrD IZT']"))) titleContent = title.text doesTitleExist = True except (TimeoutException): doesTitleExist = False titleContent = 'N/A' print('\nTitle content:\n\n' + titleContent) # Get source try: source = self.__wait.until( EC.presence_of_element_located(( By.CSS_SELECTOR, "div[class='Jea jzS zI7 iyn Hsu'] a[class='linkModuleActionButton']" ))) srcContent = source.get_attribute('href') except: srcContent = 'N/A' print('\nSource content:\n\n' + srcContent) # Get caption print('\nCaption content:\n') try: caption = self.__wait.until( EC.presence_of_element_located(( By.CSS_SELECTOR, "span[class='tBJ dyH iFc MF7 pBj DrD IZT swG']" ))) captionContent = caption.text except (TimeoutException): captionContent = 'N/A' print(captionContent) if (titleContent == 'N/A' and srcContent != 'N/A'): try: titleContent = TitleParser.GetTitle(srcContent) except: titleContent = 'N/A' # Write image to directory imageSuccess = self.__DownloadImage(imageLink, imageName) # Write caption to captions.txt in directory if (imageSuccess): successCount += 1 captionSuccess = self.__WriteToMetadataFile( imageName, titleContent, srcContent, captionContent) if (captionSuccess): if (not doesTitleExist): self.__WriteToCSVFile(imageName, captionContent[0:20], link) else: self.__WriteToCSVFile(imageName, titleContent[0:20], link) else: print('Image not greater than bounds: ' + imageLink) except: print('No image found (src = NULL)') print() print()
row['viewCount'] = '1,000,000+' data.append(row) with open("Fixed" + readFile, 'w', encoding='utf8', newline='') as output_file: keys = data[0].keys() writer = csv.DictWriter(output_file, keys) writer.writeheader() writer.writerows(data) def fixNullByte(): readFile = 'YouTubeData5.csv' data = '' with open(readFile) as fd: data = fd.read() with open("NoNull" + readFile, 'w') as fo: fo.write(data.replace('\x00', '')) #fixNullByte() #fixViewCount() s = grabYouTubeSample() TitleParser.parse_videos(s) TitleParser.compute_average_views() TitleParser.gen_compute_average_views(TitleParser.description_dict, 'DescriptionData.csv') TitleParser.gen_compute_average_views(TitleParser.tag_dict, 'TagData.csv')
row['viewCount'] = '10,001-100,000' elif row['viewCount'] <= 1000000: row['viewCount'] = '100,001-1,000,000' else: row['viewCount'] = '1,000,000+' data.append(row) with open("Fixed"+readFile,'w',encoding='utf8',newline='') as output_file: keys = data[0].keys() writer = csv.DictWriter(output_file, keys) writer.writeheader() writer.writerows(data) def fixNullByte(): readFile = 'YouTubeData5.csv' data = '' with open(readFile) as fd: data = fd.read() with open("NoNull"+readFile, 'w') as fo: fo.write(data.replace('\x00', '')) #fixNullByte() #fixViewCount() s = grabYouTubeSample() TitleParser.parse_videos(s) TitleParser.compute_average_views() TitleParser.gen_compute_average_views(TitleParser.description_dict, 'DescriptionData.csv') TitleParser.gen_compute_average_views(TitleParser.tag_dict, 'TagData.csv')