Exemple #1
0
def main():
    start = time.time()

    if len(sys.argv) < 2:
        print 'Please, specify a number of records to process and regular expression (optional)!'
        sys.exit(1)

    configs = ConfSpyPy.load('spypy.cfg')

    dataspypy = DataSpyPy(configs['host'], configs['port'])
    procspypy = ProcSpyPy(dataspypy, configs['google_analytics'], configs['google_adsense'])
    queue = Queue.Queue()

    for i in range(configs['threads']):
        dp = DocumentProcessor(queue, procspypy)
        dp.setDaemon(True)
        dp.start()

    records = dataspypy.get_unprocessed_records(int(sys.argv[1]), sys.argv[2] if len(sys.argv) == 3 else None)
    for record in records:
        queue.put(record)

    queue.join()

    print "Elapsed Time: %s" % (time.time() - start)
Exemple #2
0
def main():
    if len(sys.argv) < 2:
        print 'Please, specify text file!'
        sys.exit(1)

    domains = IoSpyPy.file_get_contents(sys.argv[1])
    configs = ConfSpyPy.load('spypy.cfg')
    dataspypy = DataSpyPy(configs['host'], configs['port'])

    for domain in domains:
        doc = {
            'date': datetime.utcnow(),
            'domain': domain,
            'ip': '',
            'url': '',
            'title': '',
            'description': '',
            'keywords': [],
            'analytics': '',
            'adsense': '',
            'server': '',
            'hfields': {},
            'processed': 0
        }

        print 'Inserting domain: %s' % domain

        dataspypy.insert_record(doc)

    print 'Done'
Exemple #3
0
def main():
    start = time.time()

    if len(sys.argv) < 2:
        print 'Please, specify a number of records to process and regular expression (optional)!'
        sys.exit(1)

    configs = ConfSpyPy.load('spypy.cfg')

    dataspypy = DataSpyPy(configs['host'], configs['port'])
    procspypy = ProcSpyPy(dataspypy, configs['google_analytics'],
                          configs['google_adsense'])
    queue = Queue.Queue()

    for i in range(configs['threads']):
        dp = DocumentProcessor(queue, procspypy)
        dp.setDaemon(True)
        dp.start()

    records = dataspypy.get_unprocessed_records(
        int(sys.argv[1]), sys.argv[2] if len(sys.argv) == 3 else None)
    for record in records:
        queue.put(record)

    queue.join()

    print "Elapsed Time: %s" % (time.time() - start)
Exemple #4
0
def main(urls):
    configs = ConfSpyPy.load('spypy.cfg')

    dataspypy = DataSpyPy(configs['host'], configs['port'])

    procspypy = ProcSpyPy(dataspypy, configs['google_analytics'], configs['google_adsense'])
    procspypy.process_urls(urls)
Exemple #5
0
def callback(ch, method, properties, body):
    try:
        configs = ConfSpyPy.load('spypy.cfg')
        dataspypy = DataSpyPy(configs['host'], configs['port'])
        procspypy = ProcSpyPy(dataspypy, configs['google_analytics'], configs['google_adsense'])
        procspypy.process_document(loads(body))
        ch.basic_ack(delivery_tag=method.delivery_tag)
    except Exception, e:
        print e
Exemple #6
0
def main():
    if len(sys.argv) < 2:
        print 'Please, specify a number of records to enqueue and regular expression (optional)!'
        sys.exit(1)

    configs = ConfSpyPy.load('spypy.cfg')

    connection = pika.BlockingConnection(pika.ConnectionParameters(host='localhost'))
    channel = connection.channel()
    channel.queue_declare(queue=configs['queue'], durable=False)

    dataspypy = DataSpyPy(configs['host'], configs['port'])
    records = dataspypy.get_unprocessed_records(int(sys.argv[1]), sys.argv[2] if len(sys.argv) == 3 else None)
    for record in records:
        print 'Enqueuing %s' % record['domain']
        channel.basic_publish(exchange='', routing_key=configs['queue'], body=dumps(record))

    connection.close()
Exemple #7
0
def main():
    if len(sys.argv) < 2:
        print 'Please, specify a number of records to enqueue and regular expression (optional)!'
        sys.exit(1)

    configs = ConfSpyPy.load('spypy.cfg')

    connection = pika.BlockingConnection(
        pika.ConnectionParameters(host='localhost'))
    channel = connection.channel()
    channel.queue_declare(queue=configs['queue'], durable=False)

    dataspypy = DataSpyPy(configs['host'], configs['port'])
    records = dataspypy.get_unprocessed_records(
        int(sys.argv[1]), sys.argv[2] if len(sys.argv) == 3 else None)
    for record in records:
        print 'Enqueuing %s' % record['domain']
        channel.basic_publish(exchange='',
                              routing_key=configs['queue'],
                              body=dumps(record))

    connection.close()