Example #1
0
def main(flist, dbname='ftp_files.db', xname='xapian.db', verbose=False):
    '''
    Main method: dispatches tasks to catalogue and index remote FTP servers.
    '''
    db = Database(dbname)
    indexer = Indexer(xname, writeable=True)
    
    # Read list of remote FTP servers
    servers = []
    with open(flist) as f:
        servers = f.read().splitlines()
    
    for server in servers:
        if verbose: print "Scanning: %s" % server
        
        # Record all files on a remote server
        if not enumerate_files(server, db):
            print "Could not enumerate files on %s" % server
        
        # Download text and add to corpus
        if not index_content(server, indexer, db):
            print "Could not index %s" % server
    
    if verbose: print "\nCataloguing and indexing complete."
    
    # cleanup
    indexer.close()
    db.close()
Example #2
0
def main(search_terms):
    dbname = 'ftp_files.db'
    db = Database(dbname)

    xname = 'xapian.db'
    corpus = Indexer(xname)

    result = corpus.search(str(search_terms))
    print_results(result[0], result[1], result[2], db)

    # clean up
    corpus.close()
    db.close()
Example #3
0
def main(search_terms):
    dbname = 'ftp_files.db'
    db = Database(dbname)
    
    xname = 'xapian.db'
    corpus = Indexer(xname)
    
    result = corpus.search(str(search_terms))
    print_results(result[0], result[1], result[2], db)
    
    # clean up
    corpus.close()
    db.close()
Example #4
0
def main(flist, plist="prefix.conf", dbname="ftp_files.db", xname="xapian.db", verbose=False):
    """
    Main method: dispatches tasks to catalogue and index remote FTP servers.
    """
    db = Database(dbname)
    indexer = Indexer(xname, writeable=True)

    # Read list of prefixes
    prefixes = []
    with open(plist) as f:
        prefixes = f.read().splitlines()

    # Read list of remote FTP servers
    servers = []
    with open(flist) as f:
        servers = f.read().splitlines()

    # Compile list of all servers
    for server in servers[:]:
        idx = servers.index(server)
        for prefix in prefixes:
            servers.insert(idx, prefix + "." + server)

    for server in servers:
        if verbose:
            print "Scanning: %s" % server

        # Determine if server is a valid FTP site
        if not is_open_ftp_server(server):
            continue

        if verbose:
            print "\tServer is valid, connecting..."

        # Record all files on a remote server
        if not enumerate_files(server, db, verbose=verbose):
            print "\tCould not enumerate files on %s" % server
            continue

        # Download text and add to corpus
        if not index_content(server, indexer, db, verbose=verbose):
            print "\tCould not index %s" % server

    if verbose:
        print "\nCataloguing and indexing complete."

    # cleanup
    indexer.close()
    db.close()
Example #5
0
def main(flist, plist='prefix.conf', dbname='ftp_files.db', xname='xapian.db', verbose=False):
    '''
    Main method: dispatches tasks to catalogue and index remote FTP servers.
    '''
    db = Database(dbname)
    indexer = Indexer(xname, writeable=True)
    
    # Read list of prefixes
    prefixes = []
    with open(plist) as f:
        prefixes = f.read().splitlines()
    
    # Read list of remote FTP servers
    servers = []
    with open(flist) as f:
        servers = f.read().splitlines()
    
    # Compile list of all servers
    for server in servers[:]:
        idx = servers.index(server)
        for prefix in prefixes:
            servers.insert(idx, prefix + '.' + server)
    
    for server in servers:
        if verbose: print "Scanning: %s" % server
        
        # Determine if server is a valid FTP site
        if not is_open_ftp_server(server):
            continue
        
        if verbose: print "\tServer is valid, connecting..."
        
        # Record all files on a remote server
        if not enumerate_files(server, db, verbose=verbose):
            print "\tCould not enumerate files on %s" % server
            continue
        
        # Download text and add to corpus
        if not index_content(server, indexer, db, verbose=verbose):
            print "\tCould not index %s" % server
    
    if verbose: print "\nCataloguing and indexing complete."
    
    # cleanup
    indexer.close()
    db.close()