Ejemplo n.º 1
0
def set_aliases(path=None):

    if path != None:
        sys.path.append(path)

    import config
    SetAlias(config.HarvestManStateObject())

    import datamgr
    import rules
    import connector
    import urlqueue
    import logger
    import event

    SetAlias(logger.HarvestManLogger())

    # Data manager object
    dmgr = datamgr.HarvestManDataManager()
    dmgr.initialize()
    SetAlias(dmgr)

    # Rules checker object
    ruleschecker = rules.HarvestManRulesChecker()
    SetAlias(ruleschecker)

    # Connector manager object
    connmgr = connector.HarvestManNetworkConnector()
    SetAlias(connmgr)

    # Connector factory
    conn_factory = connector.HarvestManUrlConnectorFactory(
        objects.config.connections)
    SetAlias(conn_factory)

    queuemgr = urlqueue.HarvestManCrawlerQueue()
    SetAlias(queuemgr)

    SetAlias(event.HarvestManEvent())
Ejemplo n.º 2
0
            self.csslinks.append(url)
            self.links.append(url)

        for item in l3:
            if not item: continue
            url = item[1].replace("'", '').replace('"', '')
            if url not in self.links:
                self.links.append(url)


if __name__ == "__main__":
    import os
    import config
    import logger

    SetAlias(config.HarvestManStateObject())
    SetAlias(logger.HarvestManLogger())

    cfg = objects.config
    cfg.verbosity = 5
    SetLogSeverity()

    cfg.getquerylinks = True

    p = HarvestManSimpleParser()
    #p.enable_feature('option')
    #p = HarvestManSGMLOpParser()

    urls = ['http://projecteuler.net/index.php?section=problems']
    urls = [
        'http://www.evvs.dk/index.php?cPath=30&osCsid=3b110c689f01d722dbbe53c5cee0bf2d'
Ejemplo n.º 3
0
    def __init__(self):

        # Locate HarvestMan modules
        import config

        self.__dict__['cfg'] = config.HarvestManStateObject()