def Closure(r, data): if isinstance(r, Exception) or r.status != 200: if err: err(r) return h = WebParser() h.feed(data) cb(h.result, url)
def __init__(self, username, password): """ Initializes a VerseBot object with supplied username and password. It is recommended that the username and password are stored in something like an environment variable for security reasons. """ logging.basicConfig(level=LOG_LEVEL) self.log = logging.getLogger("versebot") logging.getLogger("requests").setLevel(logging.WARNING) try: self.log.info("Connecting to reddit...") self.r = praw.Reddit(user_agent=( "VerseBot by /u/mgrieger. GitHub: https://github.com/matthieugrieger/versebot" )) self.o = OAuth2Util.OAuth2Util(self.r) self.o.refresh(force=True) except Exception as err: self.log.critical("Exception: %s", err) self.log.critical("Connection to reddit failed. Exiting...") exit(1) self.log.info("Successfully connected to reddit!") self.log.info("Connecting to database...") database.connect(self.log) # Initialize connection to database. self.log.info("Successfully connected to database!") self.parser = WebParser( ) # Initialize web parser with updated translation list. self.log.info("Updating translation list table...") database.update_translation_list(self.parser.translations) self.log.info("Translation list update successful!") self.log.info("Cleaning old user translation entries...") database.clean_user_translations() self.log.info("User translation cleaning successful!")
def _parsePage(self, node, resp, parseType): try: # If the response was a string, there was an exception # and the exception text is assigned as the node title if type(resp) == str or resp == None: if resp is None: node.setTitle( "Invalid, broken, or otherwise unreachable URL") else: node.setTitle(resp) return resp else: html = resp.html # If parseType is zero we are interested in the URLs # otherwise just the title and keywords if parseType == 0: node.urlList = WebParser.parseUrls(html) # Parse and assign page title title = WebParser.getPageTitle(html) node.setTitle(title) # If a keyword exists, parse for it if self.keywordExists: keywordStatus = WebParser.parseKeyword(html, self.keyword) node.setKeywordStatus(keywordStatus) node.setCrawledStatus(True) # Cleanup del (resp) resp = None return 0 except KeyboardInterrupt: sys.stderr.write("\nKeyboardInterrupt detected... exiting run.\n") sys.exit(1) except: if DEBUG: pdb.set_trace() e = sys.exc_info() sys.stderr.write("In parse page: " + str(e[1])) node.setError(str(e[0])) node.setCrawledStatus(True) return 1
def __init__(self): """ Initializes a VerseBot object. """ logging.basicConfig(level=LOG_LEVEL) self.log = logging.getLogger("versebot") logging.getLogger("requests").setLevel(logging.WARNING) # Initialize connection to Reddit try: self.log.info("Connecting to Reddit...") self.r = praw.Reddit( user_agent="VerseBot by Team VerseBot. " "GitHub: https://github.com/Team-VerseBot/versebot") self.o = OAuth2Util.OAuth2Util(self.r) self.o.refresh(force=True) except Exception as err: self.log.critical("Exception: %s", err) self.log.critical("Connection to Reddit failed. Exiting...") exit(1) self.log.info("Successfully connected to reddit!") # Initialize connection to database self.log.info("Connecting to database...") database.connect(self.log) self.log.info("Successfully connected to database!") # Initialize web parser with updated translation list self.parser = WebParser() self.log.info("Updating translation list table...") database.update_trans_list(self.parser.translations) self.log.info("Translation list update successful!") self.log.info("Cleaning old user translation entries...") database.clean_user_trans() self.log.info("User translation cleaning successful!") # Extra measure to catch any unhandled exceptions unhandled.start()
def __init__(self, product_class): WebParser.__init__(self,"Amazon",product_class)
class ExDBManager(): galaxyname = None url = None CrossIdent = [] RA = None Dec = None RA_DEG = None Dec_DEG = None Z = [] # 값, 오차 VHelio = [] #km/s VCMB = [] #km/s HubbleDistance = [] #Mpc ObjectType = None Morphoogy = None ActivityType = None OtherClassification = None Passband = None Diameter = None #kpc A_lambdaV = None A_lambdaK = None webParser = WebParser() html = None bsobj = None def __init__(self, galaxyname): self.galaxyname = galaxyname self.url = "https://ned.ipac.caltech.edu/byname?objname=" + str( galaxyname) self.html, self.bsobj = self.webParser.getHtml(self.url) self.getData() pass def printData(self): print('<Information Table Of ' + self.galaxyname + '>') print('--------------------------') print('SUCCESSFUL GET: ' + self.url) print('--------------------------') print('[Cross-Identifications]') for item in self.CrossIdent: print('- ' + item) print('') print('[Coordinates for Preferred Position]') print('> Equatorial (J2000)') print('RA: ' + self.RA) print('Dec: ' + self.Dec) print('RA(in Deg): ' + self.RA_DEG) print('Dec(in Deg): ' + self.Dec_DEG) print('') print('[Preferred Redshift & Derived Quantities] - H0 = 67.8 km/s/Mpc') print('z: ' + self.Z[0] + ' +/- ' + self.Z[1]) print('V(Helio, km/s): ' + self.VHelio[0] + ' +/- ' + self.VHelio[1]) print('V(CMB, km/s): ' + self.VCMB[0] + ' +/- ' + self.VCMB[1]) print('') print('[Classifications]') print('Object Type: ' + self.ObjectType) print('Morphology: ' + self.Morphoogy) print('Other: ' + self.OtherClassification) print('') print('[Angular & Physical Diameters]') print('Passband: ' + self.Passband) print('Diameter(kpc): ' + self.Diameter) print('') print('[Foreground Galatic Extinction]') print('A_lambda [mag] V: ' + self.A_lambdaV) print('A_lambda [mag] K: ' + self.A_lambdaK) pass def returnDataList(self): return [ self.CrossIdent, self.RA, self.Dec, self.RA_DEG, self.Dec_DEG, self.Z, self.VHelio, self.VCMB, self.HubbleDistance, self.ObjectType, self.Morphoogy, self.ActivityType, self.Passband, self.Diameter, self.A_lambdaV, self.A_lambdaK ] def getData(self): tr = self.webParser.getItemByClass( self.bsobj, 'tr', 'ov_insiderow ov_inside_crosid_row') obj = self.webParser.getItemsByTag(tr, 'span') for item in obj: self.CrossIdent.append(item.text.strip()) pass tr = self.webParser.getItemByClass(self.bsobj, 'tr', 'ov_insiderow ov_inside_coord_row') obj = self.webParser.getItemsByTag(tr, 'td') span = self.webParser.getItemsByTag(obj[0], 'span') if str(type(span[0])) == "<class 'bs4.element.Tag'>": self.RA = span[0].text.strip() pass if str(type(span[1])) == "<class 'bs4.element.Tag'>": self.Dec = span[1].text.strip() pass span = self.webParser.getItemsByTag(obj[1], 'span') if str(type(span[0])) == "<class 'bs4.element.Tag'>": self.RA_DEG = span[0].text.strip() pass if str(type(span[1])) == "<class 'bs4.element.Tag'>": self.Dec_DEG = span[1].text.strip() pass tr = self.webParser.getItemByClass( self.bsobj, 'tr', 'ov_insiderow ov_inside_redshift_row') obj = self.webParser.getItemsByTag(tr, 'td') span = self.webParser.getItemsByTag(obj[0], 'span') if str(type(span[0])) == "<class 'bs4.element.Tag'>": self.Z.append(span[0].text.strip()) pass if str(type(span[1])) == "<class 'bs4.element.Tag'>": self.Z.append(span[1].text.strip()) pass span = self.webParser.getItemsByTag(obj[1], 'span') if str(type(span[0])) == "<class 'bs4.element.Tag'>": self.VHelio.append(span[0].text.strip()) pass if str(type(span[1])) == "<class 'bs4.element.Tag'>": self.VHelio.append(span[1].text.strip()) pass span = self.webParser.getItemsByTag(obj[3], 'span') if str(type(span[0])) == "<class 'bs4.element.Tag'>": self.VCMB.append(span[0].text.strip()) pass if str(type(span[1])) == "<class 'bs4.element.Tag'>": self.VCMB.append(span[1].text.strip()) pass span = self.webParser.getItemsByTag(obj[4], 'span') if str(type(span[0])) == "<class 'bs4.element.Tag'>": self.HubbleDistance.append(span[0].text.strip()) pass if str(type(span[1])) == "<class 'bs4.element.Tag'>": self.HubbleDistance.append(span[1].text.strip()) pass tr = self.webParser.getItemByClass( self.bsobj, 'tr', 'ov_insiderow ov_inside_classification_row') obj = self.webParser.getItemsByTag(tr, 'td') span = self.webParser.getItemByTag(obj[0], 'span') if str(type(span)) == "<class 'bs4.element.Tag'>": self.ObjectType = span.text.strip() pass span = self.webParser.getItemByTag(obj[1], 'span') if str(type(span)) == "<class 'bs4.element.Tag'>": self.Morphoogy = span.text.strip() pass span = self.webParser.getItemByTag(obj[3], 'span') if str(type(span)) == "<class 'bs4.element.Tag'>": self.ActivityType = span.text.strip() pass span = self.webParser.getItemByTag(obj[5], 'span') if str(type(span)) == "<class 'bs4.element.Tag'>": self.OtherClassification = span.text.strip() pass tr = self.webParser.getItemByClass(self.bsobj, 'tr', 'ov_insiderow ov_inside_sizes_row') obj = self.webParser.getItemsByTag(tr, 'td') span = self.webParser.getItemByTag(obj[0], 'span') if str(type(span)) == "<class 'bs4.element.Tag'>": self.Passband = span.text.strip() pass span = self.webParser.getItemByTag(obj[3], 'span') if str(type(span)) == "<class 'bs4.element.Tag'>": self.Diameter = span.text.strip() pass span = self.webParser.getItemByTag(obj[4], 'span') if str(type(span)) == "<class 'bs4.element.Tag'>": self.A_lambdaV = span.text.strip() pass span = self.webParser.getItemByTag(obj[5], 'span') if str(type(span)) == "<class 'bs4.element.Tag'>": self.A_lambdaK = span.text.strip() pass pass pass
def __init__(self, product_class): WebParser.__init__(self,"newegg",product_class)
def ratemapper(x): if type(x) is not float: return WebParser().parse(x, WebParser.PSTYPE_RATINGS) elif x < Ratings.NEUTRAL: return 0 return x
def __init__(self, product_class): WebParser.__init__(self,"360Buy",product_class)
def __init__(self, product_class): WebParser.__init__(self,"DangDang",product_class)