Ejemplo n.º 1
0
    def squeeze(self):
        if (len(self.spongeDatasourcePlugins) > 0): # Check for existence of data source plugins
            fooPlugin = 0
            #
            # Here we will define the concept of a publisher.  A publisher is an abstraction for any
            # site, service, or component that handles the publication of data in a way that it can be
            # accessed, consumed, and understood by interested and authorized parties.  Our publisher
            # model is only conceptual at the moment, but we will define the concepts now.  We will not
            # define what ways data can be accessed, consumed, or understood.  We will discuss the concepts
            # for each of these action verbs.
            #
            # Publishers can make data accessible by exposing it over a network, typically published
            # on a web page or via a programatic web service interface.
            #
            # Publishers can allow data to be consumed, by making it possible for interested parties to
            # export the data as it has been published.  In other words, if I publish a data series, my
            # publisher should not only allow you to view the data, but also pull or export the data
            # as I have published it, and use it for other purposes.
            #
            # Publishers present data in such a way that it can be understood.  The best way to do this
            # is to provide the data a graphical representation, a bar graph, line graph, or some other
            # form of graph to best represent your data.  For analytics data, we probably care most about
            # data over time.  If graphics are not practical, then presenting summary data that has
            # been analyzed, with highlights, or more
            #
            #
            #
            # Process data sources
            # 0. Setup the HTTP Basic Authentication for the timetric web service
            # 1. Loop through the existing data sources, and attempt to find all of the .csv backingstores.
            # 2. get the datasource metadata
            # 3. push the existing data series store in the default backing store (currently .csv files) into
            # a publisher.
            # 4. email me when the complete set of data series have been uploaded. XXX: TODO: change how
            # to handle this notification so this is pluggable.
            #
            wsCaptain = wscaptain.WSCaptain()

            publisherURL = self.spongeProjectEnv['publisher.service.timetric.update.url']
            apitokenKey = self.spongeProjectEnv['publisher.service.timetric.apitoken.key']
            apitokenSecret = self.spongeProjectEnv['publisher.service.timetric.apitoken.secret']
            seriesDict = eval(self.spongeProjectEnv['publisher.service.timetric.series'])

            # XXX: I need to verify if the authentication actually worked and avoid publishing if it did not + report error
            anOpener = wsCaptain.createHTTPBasicAuthenticationOpenerContext(apitokenKey, apitokenSecret, publisherURL)

            for datasourceKey in self.spongeDatasourcePlugins.keys():
                datasource = self.spongeDatasourcePlugins[datasourceKey]
                fooPlugin = new.instance(datasource)
                fooPlugin.__init__(self.spongeProjectEnv)
                # We only want the datasource metadata, not to soak data from the datasources.
                ds_col_labels = fooPlugin.get_datasource_metadata()
                dbname = datasourceKey
                os.chdir(self.spongeProjectEnv['project.db.dir'])

                for col in ds_col_labels.keys(): # XXX What is the best way to iterate over the values?
                    label = ds_col_labels[col][0]
                    dbcsv = dbname + "." + label + ".csv"
                    if (dbexists(dbcsv) is True):
                        db = dbopen(dbcsv, flag='c', format='csv')
                        if (db is not None):
                            seriesID = seriesDict[datasourceKey][col-1]
                            seriesURL = publisherURL + "/" + seriesID + "/"
                            print "Publishing " + dbcsv + " to URL = " + seriesURL
                            headers = {'Content-Type':'text/csv'}
                            data = ""
                            for key in db:
                                data = data + key + "," + db[key] + "\r\n"
                            request = wsCaptain.createRequest(seriesURL, data, headers)
                            try:
                                response = urllib2.urlopen(request)
                            except urllib2.HTTPError, e:
                                print e
                            page = wsCaptain.openPage(seriesURL)
                        else:
                            print "Couldn't open DB name = " + dbcsv
                        db.close()
                    else:
                        # Do this if you can't find the actual .csv source file
                        print "Skipping publish of " + dbcsv
            os.chdir(self.baseDir) # Do this to get back to our original working directory
Ejemplo n.º 2
0
    def soak(self): # XXX What should this return?
        if (len(self.spongeDatasourcePlugins) > 0): # Check for existence of data source plugins
            fooPlugin = 0
            #
            # Use Default
            # should normally loop through and process all of the data source plugins
            # and only fail with exit if there are no plugins available
            # even if plugins don't work, they should return error info to stderr/stdout
            # and should not commit results to backing store if any plugin fails.
            # XXX: TODO Need to see if I actually honor this
            #
            # Example Plugin init: plugin = GithubDatasourcePlugin()
            #          self.spongeDatasourcePlugins['GithubDatasourcePlugin':plugin]
            #
            # Process data sources
            # 1. get the data source instance
            # 2. get the results
            # 3. get the metadata
            # 4. persist the results into various formats
            for datasourceKey in self.spongeDatasourcePlugins.keys():
                datasource = self.spongeDatasourcePlugins[datasourceKey]
                # print "Processing data source" + datasourceKey + " and datasource = " +
                # fooPlugin = datasource.__init__(self)
                fooPlugin = new.instance(datasource)
                fooPlugin.__init__(self.spongeProjectEnv)
                metadata = fooPlugin.get_plugin_metadata()
                ds_col_labels = fooPlugin.get_datasource_metadata()
                print metadata
                rowResults = fooPlugin.fetch_data(self.spongeDatasourceEnv)
                dbname = datasourceKey

                os.chdir(self.spongeProjectEnv['project.db.dir'])

                #
                # XXX: TODO This section of persistence approaches should be handled by configurable
                # plugins
                # For now, inline each approach
                #
                #
                # import csv
                #
                # Persist Method 1
                # Persist to a .csv with results in row records, human readable
                # This yields one .csv per Plugin
                # This is better for crunching data on a single sheet
                isNewDB = False
                if (dbexists(dbname + ".csv") is not True):
                    isNewDB = True
                fdb = open(dbname + ".csv", 'ab')
                rowdata = None
                if isNewDB:
                    rowdata = "Date"
                    for label in ds_col_labels.values():
                        rowdata = rowdata + ",%s"%(label)
                    fdb.write(rowdata + "\n")
                rowdata = datetime.datetime.now().ctime() # This needs to be ISO
                print "time now is %s"%(rowdata)
                for data in rowResults.values():
                    rowdata = rowdata + ",%s"%(data)
                fdb.write(rowdata + "\n")
                fdb.close()

                #
                # Persist Method 2
                # For each series, put into a separate csv file
                # format should be
                # ISO DateTime Data, row data
                # This yields 1 .csv per Plugin-Column combo
                # Naming follows this  convension
                # for label in ds_col_labels.value():
                for key,value in rowResults.items():
                    db = dbopen(dbname + "." + key + ".csv", flag='c', format='csv')
                    if (db is not None):
                        # XXX May want to change how this mapped so that each key/value pair is comma-sep
                        db[datetime.datetime.isoformat(datetime.datetime.now())] = value # Warning: time is in ISO, need to convert when displaying
                        db.close()
                    else:
                        print "Couldn't create or open DB name = " + dbname + "." + key + '.csv'
                    db.close()
                print rowResults # XXX Debug
                print fooPlugin # XXX Debug
            os.chdir(self.baseDir) # Do this to get back to our original working directory
        else:
            print "Couldn't load any plugins for datasources, exiting"
            sys.exit(1)