def __init__(self,
                 address='localhost',
                 exchange='monroeminutes',
                 DEBUG=False):

        self.dispatcher = Dispatcher(address=address,
                                     exchange=exchange,
                                     DEBUG=DEBUG)
        self.access = Access()

        self.urls = []
class Dispatch(object):
    def __init__(self,
                 address='localhost',
                 exchange='monroeminutes',
                 DEBUG=False):

        self.dispatcher = Dispatcher(address=address,
                                     exchange=exchange,
                                     DEBUG=DEBUG)
        self.access = Access()

        self.urls = []

    def _getentities(self):

        # get urls
        entities = self.access.getentities()

        # build urls
        urls = []
        for entity in entities:
            now = str(strftime("%Y-%m-%d %H:%M:%S"))
            pkg = {
                'targeturl': entity['website'],  # url to scrape
                'title': entity['name'],  # town/village/city name
                'description':
                entity['description'],  # town/village/city description
                'entityid': str(entity['_id']),  # entityid
                'maxlinklevel': 4,  # if its more than this, we're screwed ...
                'creationdatetime': now,  # current ISO date/time
                'doctype': 'application/pdf',  # pdf documents
                'frequency': 10080,  # in minutes
                'allowdomains': [],
            }
            urls.append(pkg)

        return urls

    def updateurls(self):

        # get the entities list
        urls = self._getentities()

        # set urls
        self.dispatcher.seturls(urls)

    def start(self):

        print "Starting Dispatcher ..."

        # start the dispatcher with the URL list
        #try:
        if True:
            self.dispatcher.start()
Beispiel #3
0
class Dispatch(object):

    def __init__(self,address='localhost',exchange='monroeminutes',DEBUG=False):

        self.dispatcher = Dispatcher(address=address,exchange=exchange, DEBUG=DEBUG)
        self.access = Access()

        self.urls = []

    def _getentities(self):

        # get urls
        entities = self.access.getentities()

        # build urls
        urls = []
        for entity in entities:
            now = str(strftime("%Y-%m-%d %H:%M:%S"))
            pkg = {
                'targeturl':         entity['website'],     # url to scrape
                'title':             entity['name'],        # town/village/city name
                'description':       entity['description'], # town/village/city description
                'entityid':          str(entity['_id']),    # entityid
                'maxlinklevel':      4,                     # if its more than this, we're screwed ...
                'creationdatetime':  now,                   # current ISO date/time
                'doctype':           'application/pdf',     # pdf documents
                'frequency':         10080,                 # in minutes
                'allowdomains':      [],
            }
            urls.append(pkg)
        
        return urls

    def updateurls(self):

        # get the entities list
        urls = self._getentities()

        # set urls
        self.dispatcher.seturls(urls)

    def start(self):

        print "Starting Dispatcher ..."

        # start the dispatcher with the URL list
        #try:
        if True:
            self.dispatcher.start()
Beispiel #4
0
    def __init__(self,address='localhost',exchange='monroeminutes',DEBUG=False):

        self.dispatcher = Dispatcher(address=address,exchange=exchange, DEBUG=DEBUG)
        self.access = Access()

        self.urls = []