Beispiel #1
0
    def apiCollectionUpdate(self):

        jishinLogging.logger.info("API Update started")

        try:
            # Declare client and database being used
            client = MongoClient()
            db = client.eia_data
        except Exception as dbE:
            jishinLogging.logger.error("Mongo error: %s" % dbE)

        try:
            # Populate requestURLs object with api urls from file
            requestURLs = apiList.getApiUrls()
        except Exception as apiE:
            jishinLogging.logger.error("Method not found: %s" % apiE)

        try:
            # Iterate through list of URLs
            for apiUrl in requestURLs:

                # Parse the current api file
                dataRow = self.parseAPI(apiUrl)

                # Pull name for collection stored in requestURLs object
                collection = db[apiUrl['name']]

                # Grab date from most recent data point for comparison
                if collection.count() != 0:
                    last_data_set = max(collection.find({}, {"date": 1, "_id": 0}))

                # Iterate through XML members to populate documents
                for row in dataRow:
                    date = row.find('date').text

                    if date > last_data_set['date']:
                        value = row.find('value').text
                        post = {"date": date, "value": value}
                        collection.insert_one(post)

            jishinLogging.logger.info("API Update completed")

        except urllib2.HTTPError:
            jishinLogging.logger.error("Could not download file %s" % apiUrl['url'])
        except Exception as e:
            jishinLogging.logger.error("Exception: %s" % e)
Beispiel #2
0
    def apiCollectionFreshPull(self):

        jishinLogging.logger.info("API Fresh Pull started")

        try:
            # Declare client and database being used
            client = MongoClient()
            db = client.eia_data
            # Purge database collections
            db.dropDatabase()
        except Exception as dbE:
            jishinLogging.logger.error("Mongo error: %s" % dbE)

        try:
            # Populate requestURLs object with api urls from file
            requestURLs = apiList.getApiUrls()
        except Exception as apiE:
            jishinLogging.logger.error("Method not found: %s" % apiE)

        try:
            # Iterate through list of URLs
            for apiUrl in requestURLs:

                # Parse the current api file
                dataRow = self.parseAPI(apiUrl)

                # Pull name for collection stored in requestURLs object
                collection = db[apiUrl['name']]

                # Iterate through XML members to populate documents
                for row in dataRow:
                    date = row.find('date').text
                    value = row.find('value').text
                    post = {"date": date, "value": value}
                    collection.insert_one(post)

            jishinLogging.logger.info("API Fresh Pull completed")

        except urllib2.HTTPError:
            jishinLogging.logger.error("Could not download file ", apiUrl['url'])
        except Exception as e:
            jishinLogging.logger.error("Exception: %s" % e)
    print("Import local file apiList not found")
try:
    from xml.etree import ElementTree as ET
except ImportError as e:
    print("Import ElementTree not found")

try:
    #Declare client and database being used
    client = MongoClient()
    db = client.eia_data
except Exception as e:
    print "Mongo error ", e

try:
    #Populate requestURLs object with api urls from file
    requestURLs = apiList.getApiUrls()
except Exception:
    print "Method not found"

try:
    #Iterate through list of URLs
    for api_url in requestURLs:
        #Request XML object from API
        tree = ET.parse(urllib2.urlopen(api_url['url']))

        #Parse XML, code would haveto change if XML format changes
        root = tree.getroot()
        series = tree.find("series")
        series_row = series.find("row")
        data = series_row.find("data")
        data_row = data.findall("row")