Ejemplo n.º 1
0
		def Closure(r, data):
			if isinstance(r, Exception) or r.status != 200:
				if err:
					err(r)
				return
			h = WebParser()
			h.feed(data)
			cb(h.result, url)
Ejemplo n.º 2
0
 def __init__(self, username, password):
     """ Initializes a VerseBot object with supplied username and password. It is recommended that
     the username and password are stored in something like an environment variable for security
     reasons. """
     logging.basicConfig(level=LOG_LEVEL)
     self.log = logging.getLogger("versebot")
     logging.getLogger("requests").setLevel(logging.WARNING)
     try:
         self.log.info("Connecting to reddit...")
         self.r = praw.Reddit(user_agent=(
             "VerseBot by /u/mgrieger. GitHub: https://github.com/matthieugrieger/versebot"
         ))
         self.o = OAuth2Util.OAuth2Util(self.r)
         self.o.refresh(force=True)
     except Exception as err:
         self.log.critical("Exception: %s", err)
         self.log.critical("Connection to reddit failed. Exiting...")
         exit(1)
     self.log.info("Successfully connected to reddit!")
     self.log.info("Connecting to database...")
     database.connect(self.log)  # Initialize connection to database.
     self.log.info("Successfully connected to database!")
     self.parser = WebParser(
     )  # Initialize web parser with updated translation list.
     self.log.info("Updating translation list table...")
     database.update_translation_list(self.parser.translations)
     self.log.info("Translation list update successful!")
     self.log.info("Cleaning old user translation entries...")
     database.clean_user_translations()
     self.log.info("User translation cleaning successful!")
Ejemplo n.º 3
0
    def _parsePage(self, node, resp, parseType):
        try:
            # If the response was a string, there was an exception
            # and the exception text is assigned as the node title
            if type(resp) == str or resp == None:
                if resp is None:
                    node.setTitle(
                        "Invalid, broken, or otherwise unreachable URL")
                else:
                    node.setTitle(resp)
                return resp
            else:
                html = resp.html

            # If parseType is zero we are interested in the URLs
            # otherwise just the title and keywords
            if parseType == 0:
                node.urlList = WebParser.parseUrls(html)

            # Parse and assign page title
            title = WebParser.getPageTitle(html)
            node.setTitle(title)

            # If a keyword exists, parse for it
            if self.keywordExists:
                keywordStatus = WebParser.parseKeyword(html, self.keyword)
                node.setKeywordStatus(keywordStatus)

            node.setCrawledStatus(True)

            # Cleanup
            del (resp)
            resp = None

            return 0

        except KeyboardInterrupt:
            sys.stderr.write("\nKeyboardInterrupt detected... exiting run.\n")
            sys.exit(1)
        except:
            if DEBUG:
                pdb.set_trace()
            e = sys.exc_info()
            sys.stderr.write("In parse page: " + str(e[1]))
            node.setError(str(e[0]))
            node.setCrawledStatus(True)
            return 1
Ejemplo n.º 4
0
    def __init__(self):
        """ Initializes a VerseBot object. """

        logging.basicConfig(level=LOG_LEVEL)
        self.log = logging.getLogger("versebot")
        logging.getLogger("requests").setLevel(logging.WARNING)

        # Initialize connection to Reddit
        try:
            self.log.info("Connecting to Reddit...")
            self.r = praw.Reddit(
                user_agent="VerseBot by Team VerseBot. "
                "GitHub: https://github.com/Team-VerseBot/versebot")
            self.o = OAuth2Util.OAuth2Util(self.r)
            self.o.refresh(force=True)
        except Exception as err:
            self.log.critical("Exception: %s", err)
            self.log.critical("Connection to Reddit failed. Exiting...")
            exit(1)
        self.log.info("Successfully connected to reddit!")

        # Initialize connection to database
        self.log.info("Connecting to database...")
        database.connect(self.log)
        self.log.info("Successfully connected to database!")

        # Initialize web parser with updated translation list
        self.parser = WebParser()
        self.log.info("Updating translation list table...")
        database.update_trans_list(self.parser.translations)
        self.log.info("Translation list update successful!")
        self.log.info("Cleaning old user translation entries...")
        database.clean_user_trans()
        self.log.info("User translation cleaning successful!")

        # Extra measure to catch any unhandled exceptions
        unhandled.start()
Ejemplo n.º 5
0
 def __init__(self, product_class):
     WebParser.__init__(self,"Amazon",product_class)
Ejemplo n.º 6
0
class ExDBManager():

    galaxyname = None
    url = None
    CrossIdent = []
    RA = None
    Dec = None
    RA_DEG = None
    Dec_DEG = None
    Z = []  # 값, 오차
    VHelio = []  #km/s
    VCMB = []  #km/s
    HubbleDistance = []  #Mpc
    ObjectType = None
    Morphoogy = None
    ActivityType = None
    OtherClassification = None
    Passband = None
    Diameter = None  #kpc
    A_lambdaV = None
    A_lambdaK = None

    webParser = WebParser()
    html = None
    bsobj = None

    def __init__(self, galaxyname):
        self.galaxyname = galaxyname
        self.url = "https://ned.ipac.caltech.edu/byname?objname=" + str(
            galaxyname)
        self.html, self.bsobj = self.webParser.getHtml(self.url)
        self.getData()
        pass

    def printData(self):
        print('<Information Table Of ' + self.galaxyname + '>')
        print('--------------------------')
        print('SUCCESSFUL GET: ' + self.url)
        print('--------------------------')
        print('[Cross-Identifications]')
        for item in self.CrossIdent:
            print('- ' + item)
        print('')
        print('[Coordinates for Preferred Position]')
        print('> Equatorial (J2000)')
        print('RA: ' + self.RA)
        print('Dec: ' + self.Dec)
        print('RA(in Deg): ' + self.RA_DEG)
        print('Dec(in Deg): ' + self.Dec_DEG)
        print('')
        print('[Preferred Redshift & Derived Quantities] - H0 = 67.8 km/s/Mpc')
        print('z: ' + self.Z[0] + ' +/- ' + self.Z[1])
        print('V(Helio, km/s): ' + self.VHelio[0] + ' +/- ' + self.VHelio[1])
        print('V(CMB, km/s): ' + self.VCMB[0] + ' +/- ' + self.VCMB[1])
        print('')
        print('[Classifications]')
        print('Object Type: ' + self.ObjectType)
        print('Morphology: ' + self.Morphoogy)
        print('Other: ' + self.OtherClassification)
        print('')
        print('[Angular & Physical Diameters]')
        print('Passband: ' + self.Passband)
        print('Diameter(kpc): ' + self.Diameter)
        print('')
        print('[Foreground Galatic Extinction]')
        print('A_lambda [mag] V: ' + self.A_lambdaV)
        print('A_lambda [mag] K: ' + self.A_lambdaK)
        pass

    def returnDataList(self):
        return [
            self.CrossIdent, self.RA, self.Dec, self.RA_DEG, self.Dec_DEG,
            self.Z, self.VHelio, self.VCMB, self.HubbleDistance,
            self.ObjectType, self.Morphoogy, self.ActivityType, self.Passband,
            self.Diameter, self.A_lambdaV, self.A_lambdaK
        ]

    def getData(self):
        tr = self.webParser.getItemByClass(
            self.bsobj, 'tr', 'ov_insiderow ov_inside_crosid_row')
        obj = self.webParser.getItemsByTag(tr, 'span')
        for item in obj:
            self.CrossIdent.append(item.text.strip())
            pass

        tr = self.webParser.getItemByClass(self.bsobj, 'tr',
                                           'ov_insiderow ov_inside_coord_row')
        obj = self.webParser.getItemsByTag(tr, 'td')

        span = self.webParser.getItemsByTag(obj[0], 'span')
        if str(type(span[0])) == "<class 'bs4.element.Tag'>":
            self.RA = span[0].text.strip()
            pass
        if str(type(span[1])) == "<class 'bs4.element.Tag'>":
            self.Dec = span[1].text.strip()
            pass

        span = self.webParser.getItemsByTag(obj[1], 'span')
        if str(type(span[0])) == "<class 'bs4.element.Tag'>":
            self.RA_DEG = span[0].text.strip()
            pass
        if str(type(span[1])) == "<class 'bs4.element.Tag'>":
            self.Dec_DEG = span[1].text.strip()
            pass

        tr = self.webParser.getItemByClass(
            self.bsobj, 'tr', 'ov_insiderow ov_inside_redshift_row')
        obj = self.webParser.getItemsByTag(tr, 'td')

        span = self.webParser.getItemsByTag(obj[0], 'span')
        if str(type(span[0])) == "<class 'bs4.element.Tag'>":
            self.Z.append(span[0].text.strip())
            pass
        if str(type(span[1])) == "<class 'bs4.element.Tag'>":
            self.Z.append(span[1].text.strip())
            pass

        span = self.webParser.getItemsByTag(obj[1], 'span')
        if str(type(span[0])) == "<class 'bs4.element.Tag'>":
            self.VHelio.append(span[0].text.strip())
            pass
        if str(type(span[1])) == "<class 'bs4.element.Tag'>":
            self.VHelio.append(span[1].text.strip())
            pass

        span = self.webParser.getItemsByTag(obj[3], 'span')
        if str(type(span[0])) == "<class 'bs4.element.Tag'>":
            self.VCMB.append(span[0].text.strip())
            pass
        if str(type(span[1])) == "<class 'bs4.element.Tag'>":
            self.VCMB.append(span[1].text.strip())
            pass

        span = self.webParser.getItemsByTag(obj[4], 'span')
        if str(type(span[0])) == "<class 'bs4.element.Tag'>":
            self.HubbleDistance.append(span[0].text.strip())
            pass
        if str(type(span[1])) == "<class 'bs4.element.Tag'>":
            self.HubbleDistance.append(span[1].text.strip())
            pass

        tr = self.webParser.getItemByClass(
            self.bsobj, 'tr', 'ov_insiderow ov_inside_classification_row')
        obj = self.webParser.getItemsByTag(tr, 'td')

        span = self.webParser.getItemByTag(obj[0], 'span')
        if str(type(span)) == "<class 'bs4.element.Tag'>":
            self.ObjectType = span.text.strip()
            pass

        span = self.webParser.getItemByTag(obj[1], 'span')
        if str(type(span)) == "<class 'bs4.element.Tag'>":
            self.Morphoogy = span.text.strip()
            pass

        span = self.webParser.getItemByTag(obj[3], 'span')
        if str(type(span)) == "<class 'bs4.element.Tag'>":
            self.ActivityType = span.text.strip()
            pass

        span = self.webParser.getItemByTag(obj[5], 'span')
        if str(type(span)) == "<class 'bs4.element.Tag'>":
            self.OtherClassification = span.text.strip()
            pass

        tr = self.webParser.getItemByClass(self.bsobj, 'tr',
                                           'ov_insiderow ov_inside_sizes_row')
        obj = self.webParser.getItemsByTag(tr, 'td')

        span = self.webParser.getItemByTag(obj[0], 'span')
        if str(type(span)) == "<class 'bs4.element.Tag'>":
            self.Passband = span.text.strip()
            pass

        span = self.webParser.getItemByTag(obj[3], 'span')
        if str(type(span)) == "<class 'bs4.element.Tag'>":
            self.Diameter = span.text.strip()
            pass

        span = self.webParser.getItemByTag(obj[4], 'span')
        if str(type(span)) == "<class 'bs4.element.Tag'>":
            self.A_lambdaV = span.text.strip()
            pass

        span = self.webParser.getItemByTag(obj[5], 'span')
        if str(type(span)) == "<class 'bs4.element.Tag'>":
            self.A_lambdaK = span.text.strip()
            pass
        pass

    pass
Ejemplo n.º 7
0
 def __init__(self, product_class):
     WebParser.__init__(self,"newegg",product_class)
Ejemplo n.º 8
0
 def ratemapper(x):
     if type(x) is not float:
         return WebParser().parse(x, WebParser.PSTYPE_RATINGS)
     elif x < Ratings.NEUTRAL:
         return 0
     return x
Ejemplo n.º 9
0
 def __init__(self, product_class):
     WebParser.__init__(self,"360Buy",product_class)
Ejemplo n.º 10
0
 def __init__(self, product_class):
     WebParser.__init__(self,"DangDang",product_class)