コード例 #1
0
ファイル: services.py プロジェクト: nyimbi/Domo
    def __init__(self):

        ObjBase.__init__(self)
        
        # {name : workerObj}
        self.workers = dict()
        self.logger = get_logger('WorkerService_%s' % getnodename())
コード例 #2
0
ファイル: crawler.py プロジェクト: nyimbi/Domo
def create_multi(options):
    logger = get_logger('MULTIHANDLER')
    m = pycurl.CurlMulti()
    m.handles = []

    # default number of connections is 5
    # some options are hardcoded, nonprovided options
    # might be problematic when settings.py is used
    connection_count = int(options.get('maxconnections', [5])[0])
    logger.debug('maxconnections %d' % connection_count)

    for i in range(connection_count):
        c = pycurl.Curl()
        c.name = 'curl%s' % i

        # TODO: bu kısma, gerekli olan optionlar için uyarı koymak lazım

        if not hasattr(settings, 'CRAWL_OPTIONS'):
            logger.error('No options for crawler found')
            return None

        for opt, val in settings.CRAWL_OPTIONS.items():

            # use user provided options or defaults
            optval = options.get(opt.lower())
            optval = optval and (t.match(optval[0]) and int(optval[0])
                                 or optval[0]) or val

            # optval is an external list
            c.setopt(getattr(pycurl, opt), val)
        m.handles.append(c)

    return m, m.handles[:]  # multi, freelist
コード例 #3
0
ファイル: crawler.py プロジェクト: altunyurt/Domo
def create_multi(options):
    logger = get_logger("MULTIHANDLER")
    m = pycurl.CurlMulti()
    m.handles = []

    # default number of connections is 5
    # some options are hardcoded, nonprovided options
    # might be problematic when settings.py is used
    connection_count = int(options.get("maxconnections", [5])[0])
    logger.debug("maxconnections %d" % connection_count)

    for i in range(connection_count):
        c = pycurl.Curl()
        c.name = "curl%s" % i

        # TODO: bu kısma, gerekli olan optionlar için uyarı koymak lazım

        if not hasattr(settings, "CRAWL_OPTIONS"):
            logger.error("No options for crawler found")
            return None

        for opt, val in settings.CRAWL_OPTIONS.items():

            # use user provided options or defaults
            optval = options.get(opt.lower())
            optval = optval and (t.match(optval[0]) and int(optval[0]) or optval[0]) or val

            # optval is an external list
            c.setopt(getattr(pycurl, opt), val)
        m.handles.append(c)

    return m, m.handles[:]  # multi, freelist
コード例 #4
0
ファイル: darc.py プロジェクト: altunyurt/Domo
    def __init__(self, filename):

        self.logger = get_logger('DArcReader')
        try:
            self.file = gzip.open(filename)
        except IOError:
            self.logger.error('File not found: %s' % filename)
            self.file = None
コード例 #5
0
ファイル: darc.py プロジェクト: altunyurt/Domo
    def __init__(self, filename):

        self.filename = filename.split('.')[0]              # trim extension
        self.file = gzip.GzipFile('%s_temp' % self.filename, 'wb')
        
        # if redirected, new url should be rewritten according to regex rules
        self.regx = re.compile(r'Location: (.*)\r\n')
        self.logger = get_logger('DArcWriter')
コード例 #6
0
ファイル: darc.py プロジェクト: nyimbi/Domo
    def __init__(self, filename):

        self.logger = get_logger('DArcReader')
        try:
            self.file = gzip.open(filename)
        except IOError:
            self.logger.error('File not found: %s' % filename)
            self.file = None
コード例 #7
0
ファイル: darc.py プロジェクト: nyimbi/Domo
    def __init__(self, filename):

        self.filename = filename.split('.')[0]  # trim extension
        self.file = gzip.GzipFile('%s_temp' % self.filename, 'wb')

        # if redirected, new url should be rewritten according to regex rules
        self.regx = re.compile(r'Location: (.*)\r\n')
        self.logger = get_logger('DArcWriter')
コード例 #8
0
ファイル: __init__.py プロジェクト: altunyurt/Domo
    def __init__(self, config, report=None, status=None):
        # TODO: format configuration items, everything is in list form for now
        self.name = config.get('options').get('name')[0]
        self.logger = get_logger(self.name)

        # shared objects 
        self.report = report
        self.status = status
        self.config = config
コード例 #9
0
ファイル: __init__.py プロジェクト: nyimbi/Domo
    def __init__(self, config, report=None, status=None):
        # TODO: format configuration items, everything is in list form for now
        self.name = config.get('options').get('name')[0]
        self.logger = get_logger(self.name)

        # shared objects
        self.report = report
        self.status = status
        self.config = config
コード例 #10
0
ファイル: domo_client.py プロジェクト: altunyurt/Domo
def listprofiles(*args):
    logger = get_logger('ListProfiles')
    session = Session()
    profiles = session.query(Profile).all()

    if not profiles:
        logger.error('No profiles found')
    else:
        logger.info('Profiles found: %s' % ', '.join([profile.name for profile in profiles]))
    sys.exit(0)
コード例 #11
0
    def __init__(self, config):
        if not hasattr(self, 'logger'):
            self.logger = get_logger('UrlsContainer')

        self.options = config.get('options')
        self.plugins = config.get('plugins')
        self.done = Set([])
        # TODO: format configuration objects, everything is in list form
        self.queue = Set([
            item.strip().encode('utf-8')
            for item in self.options.get('seeds')[0].split(',')
        ] or [])
        self.temp = Set([])
        self.failed = Set([])
        self.retries = dict()
        self.extractors = []
        self.filters = []
        self.transformers = []

        # TODO: rewrite
        # plugins = {plug: [params], plug2: [params2]}
        # plugins with no params where args_required will be discarded
        for name, parms in self.plugins.items():
            try:
                cls = registry.get(name)
                if cls.opts.get('args_required') and not parms[0]:
                    continue

                if name.endswith("Extractor"):
                    self.extractors.append(cls(parms))

                elif name.endswith("Filter"):
                    self.filters.append(cls(parms))

                elif name.endswith("Transform"):
                    self.transformers.append(cls(parms))
            except:
                self.logger.error('Loading of plugin %s failed\n\n %s' %
                                  (name, traceback.format_exc()))

        # also add enabled but non visible plugins to scene
        for name, cls in registry.items():
            try:
                if cls.enabled and not cls.visible:
                    if name.endswith("Extractor"):
                        self.extractors.append(cls())

                    elif name.endswith("Filter"):
                        self.filters.append(cls())

                    elif name.endswith("Transform"):
                        self.transformers.append(cls())
            except:
                self.logger.error('Loading of plugin %s failed\n\n %s' %
                                  (name, traceback.format_exc()))
コード例 #12
0
ファイル: domo_client.py プロジェクト: altunyurt/Domo
def listnodes(*args):
    logger = get_logger('Listnodes')

    ns = Pyro.naming.NameServerLocator().getNS()
    nlist = ns.list(':Default.%s' % domain)
    if not nlist:
        logger.error('No nodes found')
    else:
        logger.info('Nodes found: %s' % ", ".join([node[0] for node in nlist]))

    sys.exit(0)
コード例 #13
0
ファイル: domo_client.py プロジェクト: nyimbi/Domo
def listprofiles(*args):
    logger = get_logger('ListProfiles')
    session = Session()
    profiles = session.query(Profile).all()

    if not profiles:
        logger.error('No profiles found')
    else:
        logger.info('Profiles found: %s' %
                    ', '.join([profile.name for profile in profiles]))
    sys.exit(0)
コード例 #14
0
ファイル: domo_client.py プロジェクト: nyimbi/Domo
def listnodes(*args):
    logger = get_logger('Listnodes')

    ns = Pyro.naming.NameServerLocator().getNS()
    nlist = ns.list(':Default.%s' % domain)
    if not nlist:
        logger.error('No nodes found')
    else:
        logger.info('Nodes found: %s' % ", ".join([node[0] for node in nlist]))

    sys.exit(0)
コード例 #15
0
ファイル: container.py プロジェクト: altunyurt/Domo
    def __init__(self, config):
        if not hasattr(self, 'logger'):
            self.logger = get_logger('UrlsContainer')
            
        self.options = config.get('options')
        self.plugins = config.get('plugins')
        self.done = Set([])
        # TODO: format configuration objects, everything is in list form
        self.queue = Set([item.strip().encode('utf-8') for item in self.options.get('seeds')[0].split(',')] or [])
        self.temp = Set([])
        self.failed = Set([])
        self.retries = dict()
        self.extractors = []
        self.filters = []
        self.transformers = []


        # TODO: rewrite
        # plugins = {plug: [params], plug2: [params2]}
        # plugins with no params where args_required will be discarded
        for name, parms in self.plugins.items():
            try:
                cls = registry.get(name)
                if cls.opts.get('args_required') and not parms[0]:
                    continue

                if name.endswith("Extractor"):
                    self.extractors.append(cls(parms))
           
                elif name.endswith("Filter"):
                    self.filters.append(cls(parms))
          
                elif name.endswith("Transform"):
                    self.transformers.append(cls(parms))
            except:
                self.logger.error('Loading of plugin %s failed\n\n %s' % (name,
                                                                         traceback.format_exc()))

        # also add enabled but non visible plugins to scene
        for name, cls in registry.items():
            try:
                if cls.enabled and not cls.visible:
                    if name.endswith("Extractor"):
                        self.extractors.append(cls())
        
                    elif name.endswith("Filter"):
                        self.filters.append(cls())
       
                    elif name.endswith("Transform"):
                        self.transformers.append(cls())
            except:
                self.logger.error('Loading of plugin %s failed\n\n %s' % (name,
                                                                         traceback.format_exc()))
コード例 #16
0
    def __init__(self, *args, **kwargs):
        self.logger = get_logger(self.__class__.__name__)
        

        if self.args_required:
            if not args:
                self.logger.info('%s requires arguments, but none given. therefore this module is disabled' % 
                        self.__class__.__name__)
                self.enabled = False
            else:
                self.args = copy(*args)                 # always in ['blah blah'] form
        else:
            self.args = []
コード例 #17
0
ファイル: __pluginmeta.py プロジェクト: altunyurt/Domo
    def __init__(self, *args, **kwargs):
        self.logger = get_logger(self.__class__.__name__)

        if self.args_required:
            if not args:
                self.logger.info(
                    "%s requires arguments, but none given. therefore this module is disabled" % self.__class__.__name__
                )
                self.enabled = False
            else:
                self.args = copy(*args)  # always in ['blah blah'] form
        else:
            self.args = []
コード例 #18
0
ファイル: domo_client.py プロジェクト: nyimbi/Domo
def runcommand(options, parser):
    logger = get_logger('RunCommand')

    if not options.command or not options.node:
        return parser.print_help()

    #ns = Pyro.naming.NameServerLocator().getNS()
    #uri = ns.resolve('%s.%s.jobservice' % (domain, options.node))
    #js = Pyro.core.getProxyForURI(uri)
    js = Pyro.core.getProxyForURI("PYROLOC://localhost:7766/jobservice")

    if options.command == 'create':
        if not options.profile:
            return logger.error(
                'crate should be called with profile name: -n nodename -c create -p profile'
            )

        sess = Session()
        try:

            pf = sess.query(Profile).filter(
                profile.c.name == options.profile).first()
            status, workername = js.create(pf.configuration)

        except Exception, e:
            logger.error('No profile found with name: %s' % options.profile)
            logger.error("".join(getPyroTraceback(e)))
            return -1

        if status:
            j = Job(workername, 'paused')
            pf.jobs.append(j)
            #sess.save(j)
            sess.add(j)
            sess.commit()
            sess.close()
            logger.info("worker with name %s created" % workername)
            return 0

        logger.error('no worker created: %s' % workername)
        return -1
コード例 #19
0
ファイル: domo_client.py プロジェクト: altunyurt/Domo
def runcommand(options, parser):
    logger = get_logger('RunCommand')

    if not options.command or not options.node:
        return parser.print_help()

    #ns = Pyro.naming.NameServerLocator().getNS()
    #uri = ns.resolve('%s.%s.jobservice' % (domain, options.node))
    #js = Pyro.core.getProxyForURI(uri)
    js = Pyro.core.getProxyForURI("PYROLOC://localhost:7766/jobservice")

    if options.command == 'create':
        if not options.profile:
            return logger.error('crate should be called with profile name: -n nodename -c create -p profile')
            
        sess = Session()
        try:
            
            pf = sess.query(Profile).filter(profile.c.name==options.profile).first()
            status, workername = js.create(pf.configuration)

        except Exception, e:    
            logger.error( 'No profile found with name: %s' % options.profile)
            logger.error("".join(getPyroTraceback(e)))
            return -1

        if status: 
            j = Job(workername, 'paused')
            pf.jobs.append(j)
            #sess.save(j)
            sess.add(j)
            sess.commit()
            sess.close()
            logger.info ("worker with name %s created" % workername)
            return 0

        logger.error('no worker created: %s' % workername)
        return -1
コード例 #20
0
ファイル: darc.py プロジェクト: nyimbi/Domo
def indexer(filename, max_count=100):
    logger = get_logger('Indexer')

    count = offset = 0
    sess = Session()
    d = DArcReader('%s' % filename)
    logger.debug('Archive file %s opened with pointer %s' % (filename, d))
    timestamp = filename.split('_')[1]  # always profilename_timestamp_node.arc

    for chunk in d.chunks():
        # url : offset
        count += 1

        index = Index(filename, timestamp, chunk[0], offset)
        #sess.save(index)
        sess.add(index)
        if count % max_count == 0:
            sess.commit()
            sess = Session()

        offset = d.tell()
    sess.commit()
    d.close()
コード例 #21
0
ファイル: darc.py プロジェクト: altunyurt/Domo
def indexer(filename, max_count=100):
    logger = get_logger('Indexer')

    count = offset = 0
    sess = Session()
    d = DArcReader('%s' % filename)
    logger.debug('Archive file %s opened with pointer %s' % (filename, d))
    timestamp = filename.split('_')[1] # always profilename_timestamp_node.arc

    for chunk in d.chunks():
        # url : offset
        count += 1

        index = Index(filename, timestamp, chunk[0], offset)
        #sess.save(index)
        sess.add(index)
        if count % max_count == 0:
            sess.commit()
            sess = Session()

        offset = d.tell()
    sess.commit()
    d.close()
コード例 #22
0
ファイル: domo_cron.py プロジェクト: altunyurt/Domo
from Pyro.util import getPyroTraceback
from datetime import datetime
from domo import settings
from domo.interfaces.db import Site, Job, makelogsession
from optparse import OptionParser
from sqlalchemy.exceptions import InvalidRequestError
from domo.interfaces.logger import get_logger
from datetime import datetime, timedelta
import Pyro.core
import Pyro.naming
import sys
import traceback

domain = settings.DOMAIN
logger = get_logger('Cron service')

def listnodes(ns):
    nlist = ns.list(':Default.%s' % domain)

    if not nlist:
        logger.error('No nodes found, exiting')
        return None
    return [node[0] for node in nlist]

def listsites():
    session = makelogsession()()
    sites = session.query(Site).all()
    
    if not sites:
        logger.error('No sites found, exiting')
コード例 #23
0
from Pyro.util import getPyroTraceback
from datetime import datetime
from domo import settings
from domo.interfaces.db import Site, Job, makelogsession
from optparse import OptionParser
from sqlalchemy.exceptions import InvalidRequestError
from domo.interfaces.logger import get_logger
from datetime import datetime, timedelta
import Pyro.core
import Pyro.naming
import sys
import traceback

domain = settings.DOMAIN
logger = get_logger('Cron service')


def listnodes(ns):
    nlist = ns.list(':Default.%s' % domain)

    if not nlist:
        logger.error('No nodes found, exiting')
        return None
    return [node[0] for node in nlist]


def listsites():
    session = makelogsession()()
    sites = session.query(Site).all()