Ejemplo n.º 1
0
'Base classes and stuff for Disco jobs.'

# Copyright (c) Los Alamos National Security, LLC, and others.

import datetime
import io
import os.path
from pprint import pprint

import mr_path
mr_path.fix_pythonpath()

import disco.core
import disco.ddfs

import testable
import u
l = u.l

### Job submission helper function


def run(jobclass, args):
    l.info('starting; args:')
    for (arg, val) in sorted(vars(args).items()):
        l.info('  %-16s %s' % (arg, val))
    job = jobclass(args)
    job.run(input=['tag://' + args.input])
    l.info('started job %s' % (job.name))
    if (args.verbose):
        l.debug('will wait for job to finish')
Ejemplo n.º 2
0
# FIXME: stream output into DDFS

'Functions to search tweets for a regular expression.'

import collections
import re

import mr_path
mr_path.fix_pythonpath()

import disco.util

import mr_base
import time_
import u


class Job(mr_base.Job, mr_base.TSV_Reader_Job):

   def __init__(self, args, **kw):
      super(Job, self).__init__(args, **kw)
      self.regex = re.compile(args.regex)
      self.add_check_out_tags(':summary', ':matches')

   def map(self, t, params):
      if (self.regex.search(t[2])):
         yield t[1][:10], t  # day, tuple

   def reduce(self, it, out, params):
      # build totals and lists of matches
      match_buf = u.StringIO()