Esempio n. 1
0
    def process_item(self, item, spider):
        log.msg("Processing \"%s - %s\"(playid: %s, playdatetime: %s)." % (item['artist'], item['songtitle'], item['playid'], item['playdatetime']), level=log.DEBUG)

        if settings.DROPCOUNT_CONTROL != 0 and self.drops.in_row > (settings.DROPCOUNT_CONTROL - 1): # #DebugTool: Limit number of times an item can be dropped.
            print "*************************************"
            print "Drop limit met.  Goodbye."
            print "*************************************"
            crawler._signal_shutdown(9,0) #Kills the Spider
        elif self.check_duplicate(self.databaseTable, item, ['playid', 'station']):
            log.msg("Duplicate item found.  Dropping \"%s - %s\"(playid: %s, playdatetime: %s)." % (item['artist'], item['songtitle'], item['playid'], item['playdatetime']), level=log.ERROR)
            print "Duplicate item found.  Dropping \"%s - %s\"(%s)." % (item['artist'], item['songtitle'], self.drops.in_row)
            raise DropItem("Item already exists in db.")
        else:
            log.msg("No duplicate found.  Inserting \"%s - %s\"(playid: %s, playdatetime: %s)." % (item['artist'], item['songtitle'], item['playid'], item['playdatetime']), level=log.DEBUG)
            self.insert_item(self.databaseTable, item)
        return item
Esempio n. 2
0
    def __init__(self): # Connect to database on initialization (we want this to run once per pipeline)
        self.drops = counter('Dropped Songs')
        self.inserts = counter('Inserted Songs')

        databaseName = settings.databaseName
        databaseUser = settings.databaseUser
        databasePswd = settings.databasePswd
        databaseHost = settings.databaseHost
        self.databaseTable = settings.KEXPdatabaseTable
        conn_string = "dbname='%s' user='******' password='******' host='%s'" % (databaseName, databaseUser, databasePswd, databaseHost)
        try:
            self.conn = psycopg2.connect(conn_string)
            log.msg("Successfully connected to database \"%s\''." % (databaseName), level=log.INFO)
            self.cur = self.conn.cursor()
        except:
            # Get the most recent exception
            exceptionType, exceptionValue, exceptionTraceback = sys.exc_info()
            log.msg("Failed to connect to database (%s).  Shutting down spider." % exceptionValue, level=log.ERROR)
            crawler._signal_shutdown(9,0) #Kills the Spider if connection fails.