def main(): start = time.time() if len(sys.argv) < 2: print 'Please, specify a number of records to process and regular expression (optional)!' sys.exit(1) configs = ConfSpyPy.load('spypy.cfg') dataspypy = DataSpyPy(configs['host'], configs['port']) procspypy = ProcSpyPy(dataspypy, configs['google_analytics'], configs['google_adsense']) queue = Queue.Queue() for i in range(configs['threads']): dp = DocumentProcessor(queue, procspypy) dp.setDaemon(True) dp.start() records = dataspypy.get_unprocessed_records(int(sys.argv[1]), sys.argv[2] if len(sys.argv) == 3 else None) for record in records: queue.put(record) queue.join() print "Elapsed Time: %s" % (time.time() - start)
def main(): if len(sys.argv) < 2: print 'Please, specify text file!' sys.exit(1) domains = IoSpyPy.file_get_contents(sys.argv[1]) configs = ConfSpyPy.load('spypy.cfg') dataspypy = DataSpyPy(configs['host'], configs['port']) for domain in domains: doc = { 'date': datetime.utcnow(), 'domain': domain, 'ip': '', 'url': '', 'title': '', 'description': '', 'keywords': [], 'analytics': '', 'adsense': '', 'server': '', 'hfields': {}, 'processed': 0 } print 'Inserting domain: %s' % domain dataspypy.insert_record(doc) print 'Done'
def main(): start = time.time() if len(sys.argv) < 2: print 'Please, specify a number of records to process and regular expression (optional)!' sys.exit(1) configs = ConfSpyPy.load('spypy.cfg') dataspypy = DataSpyPy(configs['host'], configs['port']) procspypy = ProcSpyPy(dataspypy, configs['google_analytics'], configs['google_adsense']) queue = Queue.Queue() for i in range(configs['threads']): dp = DocumentProcessor(queue, procspypy) dp.setDaemon(True) dp.start() records = dataspypy.get_unprocessed_records( int(sys.argv[1]), sys.argv[2] if len(sys.argv) == 3 else None) for record in records: queue.put(record) queue.join() print "Elapsed Time: %s" % (time.time() - start)
def main(urls): configs = ConfSpyPy.load('spypy.cfg') dataspypy = DataSpyPy(configs['host'], configs['port']) procspypy = ProcSpyPy(dataspypy, configs['google_analytics'], configs['google_adsense']) procspypy.process_urls(urls)
def callback(ch, method, properties, body): try: configs = ConfSpyPy.load('spypy.cfg') dataspypy = DataSpyPy(configs['host'], configs['port']) procspypy = ProcSpyPy(dataspypy, configs['google_analytics'], configs['google_adsense']) procspypy.process_document(loads(body)) ch.basic_ack(delivery_tag=method.delivery_tag) except Exception, e: print e
def main(): if len(sys.argv) < 2: print 'Please, specify a number of records to enqueue and regular expression (optional)!' sys.exit(1) configs = ConfSpyPy.load('spypy.cfg') connection = pika.BlockingConnection(pika.ConnectionParameters(host='localhost')) channel = connection.channel() channel.queue_declare(queue=configs['queue'], durable=False) dataspypy = DataSpyPy(configs['host'], configs['port']) records = dataspypy.get_unprocessed_records(int(sys.argv[1]), sys.argv[2] if len(sys.argv) == 3 else None) for record in records: print 'Enqueuing %s' % record['domain'] channel.basic_publish(exchange='', routing_key=configs['queue'], body=dumps(record)) connection.close()
def main(): if len(sys.argv) < 2: print 'Please, specify a number of records to enqueue and regular expression (optional)!' sys.exit(1) configs = ConfSpyPy.load('spypy.cfg') connection = pika.BlockingConnection( pika.ConnectionParameters(host='localhost')) channel = connection.channel() channel.queue_declare(queue=configs['queue'], durable=False) dataspypy = DataSpyPy(configs['host'], configs['port']) records = dataspypy.get_unprocessed_records( int(sys.argv[1]), sys.argv[2] if len(sys.argv) == 3 else None) for record in records: print 'Enqueuing %s' % record['domain'] channel.basic_publish(exchange='', routing_key=configs['queue'], body=dumps(record)) connection.close()