Ejemplo n.º 1
0
def init_db_and_queue():
    global db_parsed_articles, connection, consume_channel, publish_channel

    # Init DB
    db_parsed_articles = mongo.get_parsed_articles()

    # Init connection and channels to RabbitMQ
    connection, consume_channel = queue.init_connection()
    queue.init_scanned_files(consume_channel)

    publish_channel = connection.channel()
    queue.init_parsed_articles(publish_channel, True)
Ejemplo n.º 2
0
def scan_directory(directory):
    """ Recursively scan the given directory file new articles

    """    
    print "  Starting scan..."
    print "  Directory: %s" % directory

    files_queued = 0

    # Initialize a connection and channel to RabbitMQ
    connection, channel = queue.init_connection()
    queue.init_scanned_files(channel, True)
    for root, dirs, files in os.walk(directory):

        # Walk dirs and files in alphabetical order
        dirs.sort()
        files.sort()

        for file in files:
            filename = os.path.join(root, file)

            # Only queue files that match the pattern
            if re.match(scan_match_pattern, filename):
                add_scanned_file_to_queue(channel, filename)
                files_queued += 1

                # Update status...
                if files_queued % updt_freq == 0:
                    print "    * Files Queued: %d..." % files_queued            

        # Ignore any svn dirs
        if '.svn' in dirs:
            dirs.remove('.svn')

    queue.close_connection(connection)

    print ""
    print "  ... scan complete"
    print "    Files Queued: %d" % files_queued
    print ""