Exemple #1
0
def bulk(collection=None):
    _logger.info('Running as bulk')

    if COUNTER_COMPLIANT:
        ts = TimedSet(expired=checkdatelock)

    ac = AccessChecker(collection)

    proc_coll = get_proc_collection()
    proc_robots_coll = get_proc_robots_collection()

    for logfile in os.popen('ls %s/*' % LOG_DIR):

        logfile = logfile.strip()

        # Verifica se arquivo já foi processado.
        if proc_coll.find({'file_name': logfile}).count() > 0:
            _logger.debug('File already processe %s' % logfile)
            continue

        # Registra em base de dados de arquivos processados o novo arquivo.
        _logger.info("Processing: %s" % logfile)
        proc_coll.insert({'file_name': logfile})

        rq = Local(MONGO_URI, collection)

        with open(logfile, 'rb') as f:

            log_file_line = 0
            for raw_line in f:
                log_file_line += 1
                _logger.debug("Reading line {0} from file {1}".format(str(log_file_line), logfile))
                parsed_line = ac.parsed_access(raw_line)

                if not parsed_line:
                    continue

                if COUNTER_COMPLIANT:
                    # Counter Mode Accesses
                    locktime = 10
                    if parsed_line['access_type'] == "PDF":
                        locktime = 30
                    try:
                        lockid = '_'.join([parsed_line['ip'],
                                           parsed_line['code'],
                                           parsed_line['script']])
                        ts.add(lockid, parsed_line['iso_datetime'], locktime)
                        register_access(rq, parsed_line)
                    except ValueError:
                        continue
                else:
                    # SciELO Mode Accesses
                    register_access(rq, parsed_line)


        rq.send(slp=SLEEP)
        del(rq)
    def test_expiration_custom_timeout(self):
        ts = TimedSet(expired=checkdatelock)
        ts.add('art1', '2013-05-29T00:01:01', 30)
        self.assertTrue(ts._items, {'art1': '29/May/2013:00:01:01'})

        with self.assertRaises(ValueError):
            ts.add('art1', '2013-05-29T00:01:031')

        self.assertTrue(ts._items, {'art1': '29/May/2013:00:01:01'})

        ts.add('art1', '2013-05-29T00:01:32')
        self.assertTrue(ts._items, {'art1': '29/May/2013:00:01:32'})