def extract_data(self, url): headers = { "accept-language": "en-us" } requestOK = False while requestOK == False: try: page = requests.get(url, headers=headers, timeout=5.000) requestOK = True except (ConnectionError, ReadTimeout, Timeout): print("Worker " + str(self.id) + " is reloading...") requestOK = False parsed = BeautifulSoup(page.content, "html.parser") ldate = list(parsed.select("strong.watch-time-text")) lTitle = list(parsed.select("span.watch-title")) lView = list(parsed.select("div.watch-view-count")) lSentiment = list( parsed.select('button[data-position="bottomright"] span')) title = lTitle[0].get_text().strip() try: views = common.toInt(lView[0].get_text().strip().split(" ")[0], ",") except IndexError as err: print("Length of elements: ", len(lView)) if len(lView) > 0: print(lView[0].get_text()) print(err) return None likes = common.toInt(lSentiment[0].get_text().strip(), ",") dislikes = common.toInt(lSentiment[2].get_text().strip(), ",") sdate = ldate[0].get_text().strip().split(" ") slen = len(sdate) upload_date = datetime.date(int(sdate[slen - 1]), common.get_month(sdate[slen - 3]), common.toInt(sdate[slen - 2], ",")) date_diff = datetime.date.today() - upload_date if date_diff == 0: vpd = views else: vpd = int(math.floor(views / date_diff.days)) return { "title": title, "views": views, "likes": likes, "dislikes": dislikes, "vpd": vpd, "upload_date": upload_date }
def get_channel_start_date(driver, url, silent=False): datestr = None if silent: headers = { "accept-language": "en-us" } page = requests.get(url + "/about", headers=headers) parsed = BeautifulSoup(page.content, "html.parser") stats = parsed.select( 'ul#browse-items-primary li div.about-metadata-container div.about-stats span.about-stat' ) for stat in stats: if stat.get_text().find("Joined", 0, 6) != -1: datestr = stat.get_text().split(" ") else: open_channel_tab(driver, "ABOUT") date_xpath = "//div[@id='right-column']/yt-formatted-string[contains(@class, 'ytd-channel-about-metadata-renderer')]" eDate = get_elements(driver, By.XPATH, date_xpath) datestr = eDate[1].text.split(" ") day = common.toInt(datestr[2]) month = common.get_month(datestr[1]) year = datestr[3] return datetime.date(int(year), month, day)
def _parseA(line): match = _REGEX_A.fullmatch(line) if match == None: raise ParserException(_MSG_INVALID_A) value = toInt(match.group(1)) if value != None and value > MAX_A_VALUE: raise ParserException(_MSG_INVALID_A_VALUE) return { 'type': 'A', 'value': match.group(1), }
def _addA(self, instruction): valueInt = toInt(instruction['value']) if valueInt != None: self.instructions.append('0' + toBin(valueInt, 15)) return value = instruction['value'] if value in self._SYMBOLS_TABLE: self.instructions.append('0' + toBin(self._SYMBOLS_TABLE[value], 15)) else: # Signalizes as pending. self.instructions.append(PREFIX_A + value) # Resolves the address later. self.unresolvedInstructions.append(self.instructionCounter)
def classify_msg_by_id(self): """ Classfiy messages by their unique ID. This method will generate a collection of 'LintMsg' objects which can guide us to modify the code and perform statistical analysis. """ lines = self.content.split('\n') for line in lines: m = re.search(cmn.lint_msg_pat, line) if m: ## found a match msg_id = cmn.toInt(m.group(0)) if not self.messages.has_key(msg_id): ## If this the first time identifing the ## message id, create a 'LintMsg' object for it. self.messages[msg_id] = LintMsg(msg_id) ftype, fname, line_num, desc = self.parse_line(line) self.messages[msg_id].add_record(ftype, fname, line_num, desc) if not self.files.has_key(fname): self.files[fname] = File(fname) self.files[fname].add_record(msg_id, line_num, cmn.getMsgTypeByID(msg_id))
common.addDirItem('Refresh', '', 'DefaultFolder.png', params) def __listStreams(params): eventsObj = db['events'] streams = hockeystreams.eventStreams(params, eventsObj, True if mode == common.Mode.STREAMS_REFRESH else False) db['events'] = eventsObj # write back to db for stream in streams: common.addDirItem(stream.getTitle(), stream.src, 'DefaultVideo.png', '') params = {'homeTeam': params['homeTeam'], 'awayTeam': params['awayTeam'], 'mode': common.Mode.STREAMS_REFRESH} # params.update({'mode': common.Mode.STREAMS_REFRESH}) common.addDirItem('Refresh', '', 'DefaultFolder.png', params) db = shelve.open(common.__databasefile__) common.log(common.__databasefile__) params = dict(parse_qsl(urlparse(sys.argv[2]).query, keep_blank_values=True)) mode = common.toInt(params, 'mode', common.Mode.UNKNOWN) common.log('Current mode: ' + str(mode)) try: settingsObj = db['settings'] except: settingsObj = hockeystreams.Settings() pass try: loginObj = db['login'] except: loginObj = hockeystreams.Login() pass