def scrap_vidbull(self, episode, video_id): import ipdb; ipdb.set_trace() url = "http://vidbull.com/%s" % video_id html = urllib.urlopen(url).read() soup = BeautifulSoup(html, from_encoding='latin-1') data = {} for elem in soup.find('form',attrs={'name':'F1'})('input'): if elem.get('name'): data[elem.get('name')] = elem.get('value') html = urllib.urlopen(url, urllib.urlencode(data)).read() soup = BeautifulSoup(html, from_encoding='latin-1') data = {} for elem in soup.form('input'): if elem.get('name'): data[elem.get('name')] = elem.get('value') html = urllib.urlopen(url, urllib.urlencode(data)).read() video_url = re.findall('a href="(.*?mp4)"', html)[0] media = HttpMediaFile(episode=episode) media.save() episode.subtitles = "http://sc.miratuserie.com/episodes/subs/%s.srt" % video_id.split('.')[0] episode.save()
def updateScores(self, field, scoreDict): s = self.s print("Updating scores...") html = s.get('https://www.gradesource.com/editscores1.asp?id=%s' % field, cookies = self.cookies).content returnOutput = {} totalCount = re.compile('<td nowrap colspan=3 class=BT> Maximum Points: <font color="#336699"><b>(.*)</b></font></td>') maximumScore = totalCount.search(html).group(1).strip() for k,v in scoreDict.items(): if (v == ""): value = -1 else: value = float(v) maxScore = float(maximumScore) if(value > maxScore): print(k + " has a score of " + v + " which is larger than the maximum score of " + maximumScore) returnOutput[k] = v nomnomsoup = BeautifulSoup(html) updatePOSTDict = {} updateIDDict = {} for x in nomnomsoup.form('input', id = re.compile("^student")): studentNumber = re.compile('input id="(.*)" name=') studentString = studentNumber.search(str(x)) studStr = studentString.group(1).strip() gradesourceNumber = re.compile('type="hidden" value="(.*)"') x = x.findNext("input") gradesourceString = gradesourceNumber.search(str(x)) gradStr = gradesourceString.group(1).strip() updatePOSTDict[studStr] = gradStr idNumber = re.compile('input name="id(.*)" type="hidden"') idString = idNumber.search(str(x)) idString = "id"+str(idString.group(1).strip()) updateIDDict[idString] = gradStr joinedDictA = {} saveAccount = self.savedAccount for key in saveAccount.keys(): try: joinedDictA[key] = scoreDict[saveAccount[key]] except Exception, e: print(saveAccount[key] + " was found in Gradesource but not in the CSV.") continue
def updateScores(self, field, scoreDict): s = self.s print("Updating scores...") # Grabs the website html = s.get('https://www.gradesource.com/editscores1.asp?id=%s' % field, cookies = self.cookies).content # Grabs the max score and runs a check on if any scores are over the maximum returnOutput = {} totalCount = re.compile('<td nowrap colspan=3 class=BT> Maximum Points: <font color="#336699"><b>(.*)</b></font></td>') maximumScore = totalCount.search(html).group(1).strip() for k,v in scoreDict.items(): # Edge case in which the score wasn't inputed if (v == ""): value = -1 else: value = float(v) maxScore = float(maximumScore) if(value > maxScore): # Throw warning, incase someone has a score of 11/10. Therefore they're not recorded print(k + " has a score of " + v + " which is larger than the maximum score of " + maximumScore) returnOutput[k] = v # nomnom soup magic nomnomsoup = BeautifulSoup(html) updatePOSTDict = {} updateIDDict = {} for x in nomnomsoup.form('input', id = re.compile("^student")): # Grabs the student Number studentNumber = re.compile('input id="(.*)" name=') studentString = studentNumber.search(str(x)) studStr = studentString.group(1).strip() # Grabs the gradesource Number gradesourceNumber = re.compile('type="hidden" value="(.*)"') x = x.findNext("input") gradesourceString = gradesourceNumber.search(str(x)) gradStr = gradesourceString.group(1).strip() updatePOSTDict[studStr] = gradStr # Grabs the id Number idNumber = re.compile('input name="id(.*)" type="hidden"') idString = idNumber.search(str(x)) idString = "id"+str(idString.group(1).strip()) updateIDDict[idString] = gradStr # break # Some Innerjoin magic? yay for SQL concepts! joinedDictA = {} saveAccount = self.savedAccount #InnerJoin saveAccount (gradesourceNumber, email) and scoreDict (email, score) to (gradesourceNumber, score) for key in saveAccount.keys(): try: joinedDictA[key] = scoreDict[saveAccount[key]] except Exception, e: print(saveAccount[key] + " was found in Gradesource but not in the CSV.") continue
def updateScores(self, field, scoreDict): s = self.s print("Updating scores...") # Grabs the website html = s.get('https://www.gradesource.com/editscores1.asp?id=%s' % field, cookies=self.cookies).content # Grabs the max score and runs a check on if any scores are over the maximum returnOutput = {} totalCount = re.compile( '<td nowrap colspan=3 class=BT> Maximum Points: <font color="#336699"><b>(.*)</b></font></td>' ) maximumScore = totalCount.search(html).group(1).strip() for k, v in scoreDict.items(): # Edge case in which the score wasn't inputed if (v == ""): value = -1 else: value = float(v) maxScore = float(maximumScore) if (value > maxScore): # Throw warning, incase someone has a score of 11/10. Therefore they're not recorded print(k + " has a score of " + v + " which is larger than the maximum score of " + maximumScore) returnOutput[k] = v # nomnom soup magic nomnomsoup = BeautifulSoup(html) updatePOSTDict = {} updateIDDict = {} for x in nomnomsoup.form('input', id=re.compile("^student")): # Grabs the student Number studentNumber = re.compile('input id="(.*)" name=') studentString = studentNumber.search(str(x)) studStr = studentString.group(1).strip() # Grabs the gradesource Number gradesourceNumber = re.compile('type="hidden" value="(.*)">') gradesourceString = gradesourceNumber.search(str(x)) gradStr = gradesourceString.group(1).strip() updatePOSTDict[studStr] = gradStr # Grabs the id Number idNumber = re.compile('input name="id(.*)" type="hidden"') idString = idNumber.search(str(x)) updateIDDict[str("id" + idString.group(1).strip())] = gradStr # Some Innerjoin magic? yay for SQL concepts! joinedDictA = {} saveAccount = self.savedAccount #InnerJoin saveAccount (gradesourceNumber, email) and scoreDict (email, score) to (gradesourceNumber, score) for key in saveAccount.keys(): try: joinedDictA[key] = scoreDict[saveAccount[key]] except Exception, e: print(saveAccount[key] + " was found in Gradesource but not in the CSV.") continue
def get_file(self, video_id): url = "http://amonshare.com/%s" % video_id html = urllib.urlopen(url).read() soup = BeautifulSoup(html, from_encoding="latin-1") data = {} for elem in soup.form("input"): if elem.get("name"): data[elem.get("name")] = elem.get("value") html = urllib.urlopen(url, urllib.urlencode(data)).read() soup = BeautifulSoup(html, from_encoding="latin-1") data = {} for elem in soup.form("input"): if elem.get("name"): data[elem.get("name")] = elem.get("value") html = urllib.urlopen(url, urllib.urlencode(data)).read() video_url = re.findall('href="([^"]+).mp4"', html)[0] + ".mp4" return video_url
def get_file(self, video_id): url = "http://filebox.com/%s" % video_id html = urllib.urlopen(url).read() soup = BeautifulSoup(html, from_encoding='latin-1') data = {} for elem in soup.form('input'): if elem.get('name'): data[elem.get('name')] = elem.get('value') html = urllib.urlopen(url, urllib.urlencode(data)).read() video_url = re.findall("this.play\('([^']+)'\)", html)[0] return video_url