예제 #1
0
파일: services.py 프로젝트: nyimbi/Domo
    def __init__(self):

        ObjBase.__init__(self)
        
        # {name : workerObj}
        self.workers = dict()
        self.logger = get_logger('WorkerService_%s' % getnodename())
예제 #2
0
파일: crawler.py 프로젝트: nyimbi/Domo
def create_multi(options):
    logger = get_logger('MULTIHANDLER')
    m = pycurl.CurlMulti()
    m.handles = []

    # default number of connections is 5
    # some options are hardcoded, nonprovided options
    # might be problematic when settings.py is used
    connection_count = int(options.get('maxconnections', [5])[0])
    logger.debug('maxconnections %d' % connection_count)

    for i in range(connection_count):
        c = pycurl.Curl()
        c.name = 'curl%s' % i

        # TODO: bu kısma, gerekli olan optionlar için uyarı koymak lazım

        if not hasattr(settings, 'CRAWL_OPTIONS'):
            logger.error('No options for crawler found')
            return None

        for opt, val in settings.CRAWL_OPTIONS.items():

            # use user provided options or defaults
            optval = options.get(opt.lower())
            optval = optval and (t.match(optval[0]) and int(optval[0])
                                 or optval[0]) or val

            # optval is an external list
            c.setopt(getattr(pycurl, opt), val)
        m.handles.append(c)

    return m, m.handles[:]  # multi, freelist
예제 #3
0
파일: crawler.py 프로젝트: altunyurt/Domo
def create_multi(options):
    logger = get_logger("MULTIHANDLER")
    m = pycurl.CurlMulti()
    m.handles = []

    # default number of connections is 5
    # some options are hardcoded, nonprovided options
    # might be problematic when settings.py is used
    connection_count = int(options.get("maxconnections", [5])[0])
    logger.debug("maxconnections %d" % connection_count)

    for i in range(connection_count):
        c = pycurl.Curl()
        c.name = "curl%s" % i

        # TODO: bu kısma, gerekli olan optionlar için uyarı koymak lazım

        if not hasattr(settings, "CRAWL_OPTIONS"):
            logger.error("No options for crawler found")
            return None

        for opt, val in settings.CRAWL_OPTIONS.items():

            # use user provided options or defaults
            optval = options.get(opt.lower())
            optval = optval and (t.match(optval[0]) and int(optval[0]) or optval[0]) or val

            # optval is an external list
            c.setopt(getattr(pycurl, opt), val)
        m.handles.append(c)

    return m, m.handles[:]  # multi, freelist
예제 #4
0
파일: darc.py 프로젝트: altunyurt/Domo
    def __init__(self, filename):

        self.logger = get_logger('DArcReader')
        try:
            self.file = gzip.open(filename)
        except IOError:
            self.logger.error('File not found: %s' % filename)
            self.file = None
예제 #5
0
파일: darc.py 프로젝트: altunyurt/Domo
    def __init__(self, filename):

        self.filename = filename.split('.')[0]              # trim extension
        self.file = gzip.GzipFile('%s_temp' % self.filename, 'wb')
        
        # if redirected, new url should be rewritten according to regex rules
        self.regx = re.compile(r'Location: (.*)\r\n')
        self.logger = get_logger('DArcWriter')
예제 #6
0
파일: darc.py 프로젝트: nyimbi/Domo
    def __init__(self, filename):

        self.logger = get_logger('DArcReader')
        try:
            self.file = gzip.open(filename)
        except IOError:
            self.logger.error('File not found: %s' % filename)
            self.file = None
예제 #7
0
파일: darc.py 프로젝트: nyimbi/Domo
    def __init__(self, filename):

        self.filename = filename.split('.')[0]  # trim extension
        self.file = gzip.GzipFile('%s_temp' % self.filename, 'wb')

        # if redirected, new url should be rewritten according to regex rules
        self.regx = re.compile(r'Location: (.*)\r\n')
        self.logger = get_logger('DArcWriter')
예제 #8
0
파일: __init__.py 프로젝트: altunyurt/Domo
    def __init__(self, config, report=None, status=None):
        # TODO: format configuration items, everything is in list form for now
        self.name = config.get('options').get('name')[0]
        self.logger = get_logger(self.name)

        # shared objects 
        self.report = report
        self.status = status
        self.config = config
예제 #9
0
파일: __init__.py 프로젝트: nyimbi/Domo
    def __init__(self, config, report=None, status=None):
        # TODO: format configuration items, everything is in list form for now
        self.name = config.get('options').get('name')[0]
        self.logger = get_logger(self.name)

        # shared objects
        self.report = report
        self.status = status
        self.config = config
예제 #10
0
def listprofiles(*args):
    logger = get_logger('ListProfiles')
    session = Session()
    profiles = session.query(Profile).all()

    if not profiles:
        logger.error('No profiles found')
    else:
        logger.info('Profiles found: %s' % ', '.join([profile.name for profile in profiles]))
    sys.exit(0)
예제 #11
0
    def __init__(self, config):
        if not hasattr(self, 'logger'):
            self.logger = get_logger('UrlsContainer')

        self.options = config.get('options')
        self.plugins = config.get('plugins')
        self.done = Set([])
        # TODO: format configuration objects, everything is in list form
        self.queue = Set([
            item.strip().encode('utf-8')
            for item in self.options.get('seeds')[0].split(',')
        ] or [])
        self.temp = Set([])
        self.failed = Set([])
        self.retries = dict()
        self.extractors = []
        self.filters = []
        self.transformers = []

        # TODO: rewrite
        # plugins = {plug: [params], plug2: [params2]}
        # plugins with no params where args_required will be discarded
        for name, parms in self.plugins.items():
            try:
                cls = registry.get(name)
                if cls.opts.get('args_required') and not parms[0]:
                    continue

                if name.endswith("Extractor"):
                    self.extractors.append(cls(parms))

                elif name.endswith("Filter"):
                    self.filters.append(cls(parms))

                elif name.endswith("Transform"):
                    self.transformers.append(cls(parms))
            except:
                self.logger.error('Loading of plugin %s failed\n\n %s' %
                                  (name, traceback.format_exc()))

        # also add enabled but non visible plugins to scene
        for name, cls in registry.items():
            try:
                if cls.enabled and not cls.visible:
                    if name.endswith("Extractor"):
                        self.extractors.append(cls())

                    elif name.endswith("Filter"):
                        self.filters.append(cls())

                    elif name.endswith("Transform"):
                        self.transformers.append(cls())
            except:
                self.logger.error('Loading of plugin %s failed\n\n %s' %
                                  (name, traceback.format_exc()))
예제 #12
0
def listnodes(*args):
    logger = get_logger('Listnodes')

    ns = Pyro.naming.NameServerLocator().getNS()
    nlist = ns.list(':Default.%s' % domain)
    if not nlist:
        logger.error('No nodes found')
    else:
        logger.info('Nodes found: %s' % ", ".join([node[0] for node in nlist]))

    sys.exit(0)
예제 #13
0
파일: domo_client.py 프로젝트: nyimbi/Domo
def listprofiles(*args):
    logger = get_logger('ListProfiles')
    session = Session()
    profiles = session.query(Profile).all()

    if not profiles:
        logger.error('No profiles found')
    else:
        logger.info('Profiles found: %s' %
                    ', '.join([profile.name for profile in profiles]))
    sys.exit(0)
예제 #14
0
파일: domo_client.py 프로젝트: nyimbi/Domo
def listnodes(*args):
    logger = get_logger('Listnodes')

    ns = Pyro.naming.NameServerLocator().getNS()
    nlist = ns.list(':Default.%s' % domain)
    if not nlist:
        logger.error('No nodes found')
    else:
        logger.info('Nodes found: %s' % ", ".join([node[0] for node in nlist]))

    sys.exit(0)
예제 #15
0
파일: container.py 프로젝트: altunyurt/Domo
    def __init__(self, config):
        if not hasattr(self, 'logger'):
            self.logger = get_logger('UrlsContainer')
            
        self.options = config.get('options')
        self.plugins = config.get('plugins')
        self.done = Set([])
        # TODO: format configuration objects, everything is in list form
        self.queue = Set([item.strip().encode('utf-8') for item in self.options.get('seeds')[0].split(',')] or [])
        self.temp = Set([])
        self.failed = Set([])
        self.retries = dict()
        self.extractors = []
        self.filters = []
        self.transformers = []


        # TODO: rewrite
        # plugins = {plug: [params], plug2: [params2]}
        # plugins with no params where args_required will be discarded
        for name, parms in self.plugins.items():
            try:
                cls = registry.get(name)
                if cls.opts.get('args_required') and not parms[0]:
                    continue

                if name.endswith("Extractor"):
                    self.extractors.append(cls(parms))
           
                elif name.endswith("Filter"):
                    self.filters.append(cls(parms))
          
                elif name.endswith("Transform"):
                    self.transformers.append(cls(parms))
            except:
                self.logger.error('Loading of plugin %s failed\n\n %s' % (name,
                                                                         traceback.format_exc()))

        # also add enabled but non visible plugins to scene
        for name, cls in registry.items():
            try:
                if cls.enabled and not cls.visible:
                    if name.endswith("Extractor"):
                        self.extractors.append(cls())
        
                    elif name.endswith("Filter"):
                        self.filters.append(cls())
       
                    elif name.endswith("Transform"):
                        self.transformers.append(cls())
            except:
                self.logger.error('Loading of plugin %s failed\n\n %s' % (name,
                                                                         traceback.format_exc()))
예제 #16
0
    def __init__(self, *args, **kwargs):
        self.logger = get_logger(self.__class__.__name__)
        

        if self.args_required:
            if not args:
                self.logger.info('%s requires arguments, but none given. therefore this module is disabled' % 
                        self.__class__.__name__)
                self.enabled = False
            else:
                self.args = copy(*args)                 # always in ['blah blah'] form
        else:
            self.args = []
예제 #17
0
    def __init__(self, *args, **kwargs):
        self.logger = get_logger(self.__class__.__name__)

        if self.args_required:
            if not args:
                self.logger.info(
                    "%s requires arguments, but none given. therefore this module is disabled" % self.__class__.__name__
                )
                self.enabled = False
            else:
                self.args = copy(*args)  # always in ['blah blah'] form
        else:
            self.args = []
예제 #18
0
파일: domo_client.py 프로젝트: nyimbi/Domo
def runcommand(options, parser):
    logger = get_logger('RunCommand')

    if not options.command or not options.node:
        return parser.print_help()

    #ns = Pyro.naming.NameServerLocator().getNS()
    #uri = ns.resolve('%s.%s.jobservice' % (domain, options.node))
    #js = Pyro.core.getProxyForURI(uri)
    js = Pyro.core.getProxyForURI("PYROLOC://localhost:7766/jobservice")

    if options.command == 'create':
        if not options.profile:
            return logger.error(
                'crate should be called with profile name: -n nodename -c create -p profile'
            )

        sess = Session()
        try:

            pf = sess.query(Profile).filter(
                profile.c.name == options.profile).first()
            status, workername = js.create(pf.configuration)

        except Exception, e:
            logger.error('No profile found with name: %s' % options.profile)
            logger.error("".join(getPyroTraceback(e)))
            return -1

        if status:
            j = Job(workername, 'paused')
            pf.jobs.append(j)
            #sess.save(j)
            sess.add(j)
            sess.commit()
            sess.close()
            logger.info("worker with name %s created" % workername)
            return 0

        logger.error('no worker created: %s' % workername)
        return -1
예제 #19
0
def runcommand(options, parser):
    logger = get_logger('RunCommand')

    if not options.command or not options.node:
        return parser.print_help()

    #ns = Pyro.naming.NameServerLocator().getNS()
    #uri = ns.resolve('%s.%s.jobservice' % (domain, options.node))
    #js = Pyro.core.getProxyForURI(uri)
    js = Pyro.core.getProxyForURI("PYROLOC://localhost:7766/jobservice")

    if options.command == 'create':
        if not options.profile:
            return logger.error('crate should be called with profile name: -n nodename -c create -p profile')
            
        sess = Session()
        try:
            
            pf = sess.query(Profile).filter(profile.c.name==options.profile).first()
            status, workername = js.create(pf.configuration)

        except Exception, e:    
            logger.error( 'No profile found with name: %s' % options.profile)
            logger.error("".join(getPyroTraceback(e)))
            return -1

        if status: 
            j = Job(workername, 'paused')
            pf.jobs.append(j)
            #sess.save(j)
            sess.add(j)
            sess.commit()
            sess.close()
            logger.info ("worker with name %s created" % workername)
            return 0

        logger.error('no worker created: %s' % workername)
        return -1
예제 #20
0
파일: darc.py 프로젝트: nyimbi/Domo
def indexer(filename, max_count=100):
    logger = get_logger('Indexer')

    count = offset = 0
    sess = Session()
    d = DArcReader('%s' % filename)
    logger.debug('Archive file %s opened with pointer %s' % (filename, d))
    timestamp = filename.split('_')[1]  # always profilename_timestamp_node.arc

    for chunk in d.chunks():
        # url : offset
        count += 1

        index = Index(filename, timestamp, chunk[0], offset)
        #sess.save(index)
        sess.add(index)
        if count % max_count == 0:
            sess.commit()
            sess = Session()

        offset = d.tell()
    sess.commit()
    d.close()
예제 #21
0
파일: darc.py 프로젝트: altunyurt/Domo
def indexer(filename, max_count=100):
    logger = get_logger('Indexer')

    count = offset = 0
    sess = Session()
    d = DArcReader('%s' % filename)
    logger.debug('Archive file %s opened with pointer %s' % (filename, d))
    timestamp = filename.split('_')[1] # always profilename_timestamp_node.arc

    for chunk in d.chunks():
        # url : offset
        count += 1

        index = Index(filename, timestamp, chunk[0], offset)
        #sess.save(index)
        sess.add(index)
        if count % max_count == 0:
            sess.commit()
            sess = Session()

        offset = d.tell()
    sess.commit()
    d.close()
예제 #22
0
파일: domo_cron.py 프로젝트: altunyurt/Domo
from Pyro.util import getPyroTraceback
from datetime import datetime
from domo import settings
from domo.interfaces.db import Site, Job, makelogsession
from optparse import OptionParser
from sqlalchemy.exceptions import InvalidRequestError
from domo.interfaces.logger import get_logger
from datetime import datetime, timedelta
import Pyro.core
import Pyro.naming
import sys
import traceback

domain = settings.DOMAIN
logger = get_logger('Cron service')

def listnodes(ns):
    nlist = ns.list(':Default.%s' % domain)

    if not nlist:
        logger.error('No nodes found, exiting')
        return None
    return [node[0] for node in nlist]

def listsites():
    session = makelogsession()()
    sites = session.query(Site).all()
    
    if not sites:
        logger.error('No sites found, exiting')
예제 #23
0
from Pyro.util import getPyroTraceback
from datetime import datetime
from domo import settings
from domo.interfaces.db import Site, Job, makelogsession
from optparse import OptionParser
from sqlalchemy.exceptions import InvalidRequestError
from domo.interfaces.logger import get_logger
from datetime import datetime, timedelta
import Pyro.core
import Pyro.naming
import sys
import traceback

domain = settings.DOMAIN
logger = get_logger('Cron service')


def listnodes(ns):
    nlist = ns.list(':Default.%s' % domain)

    if not nlist:
        logger.error('No nodes found, exiting')
        return None
    return [node[0] for node in nlist]


def listsites():
    session = makelogsession()()
    sites = session.query(Site).all()