Exemple #1
0
    def extract_data(self, url):
        headers = {
            "accept-language": "en-us"
        }
        requestOK = False
        while requestOK == False:
            try:
                page = requests.get(url, headers=headers, timeout=5.000)
                requestOK = True
            except (ConnectionError, ReadTimeout, Timeout):
                print("Worker " + str(self.id) + " is reloading...")
                requestOK = False
        parsed = BeautifulSoup(page.content, "html.parser")

        ldate = list(parsed.select("strong.watch-time-text"))
        lTitle = list(parsed.select("span.watch-title"))
        lView = list(parsed.select("div.watch-view-count"))
        lSentiment = list(
            parsed.select('button[data-position="bottomright"] span'))

        title = lTitle[0].get_text().strip()
        try:
            views = common.toInt(lView[0].get_text().strip().split(" ")[0],
                                 ",")
        except IndexError as err:
            print("Length of elements: ", len(lView))
            if len(lView) > 0:
                print(lView[0].get_text())
            print(err)
            return None
        likes = common.toInt(lSentiment[0].get_text().strip(), ",")
        dislikes = common.toInt(lSentiment[2].get_text().strip(), ",")

        sdate = ldate[0].get_text().strip().split(" ")
        slen = len(sdate)
        upload_date = datetime.date(int(sdate[slen - 1]),
                                    common.get_month(sdate[slen - 3]),
                                    common.toInt(sdate[slen - 2], ","))
        date_diff = datetime.date.today() - upload_date
        if date_diff == 0:
            vpd = views
        else:
            vpd = int(math.floor(views / date_diff.days))

        return {
            "title": title,
            "views": views,
            "likes": likes,
            "dislikes": dislikes,
            "vpd": vpd,
            "upload_date": upload_date
        }
Exemple #2
0
def get_channel_start_date(driver, url, silent=False):
    datestr = None
    if silent:
        headers = {
            "accept-language": "en-us"
        }
        page = requests.get(url + "/about", headers=headers)
        parsed = BeautifulSoup(page.content, "html.parser")
        stats = parsed.select(
            'ul#browse-items-primary li div.about-metadata-container div.about-stats span.about-stat'
        )
        for stat in stats:
            if stat.get_text().find("Joined", 0, 6) != -1:
                datestr = stat.get_text().split(" ")
    else:
        open_channel_tab(driver, "ABOUT")
        date_xpath = "//div[@id='right-column']/yt-formatted-string[contains(@class, 'ytd-channel-about-metadata-renderer')]"
        eDate = get_elements(driver, By.XPATH, date_xpath)
        datestr = eDate[1].text.split(" ")

    day = common.toInt(datestr[2])
    month = common.get_month(datestr[1])
    year = datestr[3]

    return datetime.date(int(year), month, day)
Exemple #3
0
def _parseA(line):
    match = _REGEX_A.fullmatch(line)

    if match == None:
        raise ParserException(_MSG_INVALID_A)

    value = toInt(match.group(1))
    if value != None and value > MAX_A_VALUE:
        raise ParserException(_MSG_INVALID_A_VALUE)

    return {
        'type': 'A',
        'value': match.group(1),
    }
Exemple #4
0
    def _addA(self, instruction):
        valueInt = toInt(instruction['value'])

        if valueInt != None:
            self.instructions.append('0' + toBin(valueInt, 15))
            return

        value = instruction['value']

        if value in self._SYMBOLS_TABLE:
            self.instructions.append('0' +
                                     toBin(self._SYMBOLS_TABLE[value], 15))
        else:
            # Signalizes as pending.
            self.instructions.append(PREFIX_A + value)

            # Resolves the address later.
            self.unresolvedInstructions.append(self.instructionCounter)
Exemple #5
0
    def classify_msg_by_id(self):
        """
        Classfiy messages by their unique ID. This method
        will generate a collection of 'LintMsg' objects which
        can guide us to modify the code and perform statistical analysis.
        """
        lines = self.content.split('\n')
        for line in lines:
            m = re.search(cmn.lint_msg_pat, line)
            if m: ## found a match
                msg_id = cmn.toInt(m.group(0))
                if not self.messages.has_key(msg_id):
                    ## If this the first time identifing the
                    ## message id, create a 'LintMsg' object for it.
                    self.messages[msg_id] = LintMsg(msg_id)

                ftype, fname, line_num, desc = self.parse_line(line)
                self.messages[msg_id].add_record(ftype, fname,
                                                 line_num, desc)

                if not self.files.has_key(fname):
                    self.files[fname] = File(fname)
                self.files[fname].add_record(msg_id, line_num,
                                             cmn.getMsgTypeByID(msg_id))
Exemple #6
0
    common.addDirItem('Refresh', '', 'DefaultFolder.png', params)

def __listStreams(params):
    eventsObj = db['events']
    streams = hockeystreams.eventStreams(params, eventsObj, True if mode == common.Mode.STREAMS_REFRESH else False)
    db['events'] = eventsObj  # write back to db
    for stream in streams:
        common.addDirItem(stream.getTitle(), stream.src, 'DefaultVideo.png', '')
    params = {'homeTeam': params['homeTeam'], 'awayTeam': params['awayTeam'], 'mode': common.Mode.STREAMS_REFRESH}
 #   params.update({'mode': common.Mode.STREAMS_REFRESH})
    common.addDirItem('Refresh', '', 'DefaultFolder.png', params)

db = shelve.open(common.__databasefile__)
common.log(common.__databasefile__)
params = dict(parse_qsl(urlparse(sys.argv[2]).query, keep_blank_values=True))
mode = common.toInt(params, 'mode', common.Mode.UNKNOWN)

common.log('Current mode: ' + str(mode))

try:
    settingsObj = db['settings']
except:
    settingsObj = hockeystreams.Settings()
    pass

try:
    loginObj = db['login']
except:
    loginObj = hockeystreams.Login()
    pass