def hbase_setupjob(job, args): """ Set up a job to run on telemetry date ranges using data from HBase Telemetry jobs expect two arguments, startdate and enddate, both in yyyymmdd format. """ import java.text.SimpleDateFormat as SimpleDateFormat import java.util.Calendar as Calendar import com.mozilla.hadoop.hbase.mapreduce.MultiScanTableMapReduceUtil as MSTMRU import com.mozilla.util.Pair if len(args) != 2: raise Exception("Usage: <startdate-YYYYMMDD> <enddate-YYYYMMDD>") sdf = SimpleDateFormat(dateformat) startdate = Calendar.getInstance() startdate.setTime(sdf.parse(args[0])) enddate = Calendar.getInstance() enddate.setTime(sdf.parse(args[1])) columns = [com.mozilla.util.Pair('data', 'json')] scans = MSTMRU.generateBytePrefixScans(startdate, enddate, dateformat, columns, 500, False) MSTMRU.initMultiScanTableMapperJob('telemetry', scans, None, None, None, job) # inform HadoopDriver about the columns we expect to receive job.getConfiguration().set("org.mozilla.jydoop.hbasecolumns", "data:json")
def setupjob(job, args): """ Set up a job to run on telemetry date ranges using data from HBase Telemetry jobs expect two arguments, startdate and enddate, both in yyyymmdd format. """ import java.text.SimpleDateFormat as SimpleDateFormat import java.util.Calendar as Calendar import com.mozilla.hadoop.hbase.mapreduce.MultiScanTableMapReduceUtil as MSTMRU import com.mozilla.util.Pair if len(args) != 2: raise Exception("Usage: <startdate-YYYYMMDD> <enddate-YYYYMMDD>") sdf = SimpleDateFormat(dateformat) startdate = Calendar.getInstance() startdate.setTime(sdf.parse(args[0])) enddate = Calendar.getInstance() enddate.setTime(sdf.parse(args[1])) columns = [com.mozilla.util.Pair('data', 'json')] scans = MSTMRU.generateBytePrefixScans(startdate, enddate, dateformat, columns, 500, False) MSTMRU.initMultiScanTableMapperJob( 'telemetry', scans, None, None, None, job) # inform HadoopDriver about the columns we expect to receive job.getConfiguration().set("org.mozilla.jydoop.hbasecolumns", "data:json");
def setupjob(job, args): """ Set up a job to run full table scans for FHR data. We don't expect any arguments. """ import org.apache.hadoop.hbase.client.Scan as Scan import com.mozilla.hadoop.hbase.mapreduce.MultiScanTableMapReduceUtil as MSTMRU scan = Scan() scan.setCaching(500) scan.setCacheBlocks(False) scan.addColumn(bytearray('data'), bytearray('json')) # FIXME: do it without this multi-scan util scans = [scan] MSTMRU.initMultiScanTableMapperJob( 'metrics', scans, None, None, None, job) # inform HadoopDriver about the columns we expect to receive job.getConfiguration().set("org.mozilla.jydoop.hbasecolumns", "data:json"); # job.getConfiguration().set("mapred.reduce.tasks",4352) job.setNumReduceTasks(4352)
def setupjob(job, args): """ Set up a job to run full table scans for FHR data. We don't expect any arguments. """ import org.apache.hadoop.hbase.client.Scan as Scan import com.mozilla.hadoop.hbase.mapreduce.MultiScanTableMapReduceUtil as MSTMRU scan = Scan() scan.setCaching(500) scan.setCacheBlocks(False) scan.addColumn(bytearray('data'), bytearray('json')) # FIXME: do it without this multi-scan util scans = [scan] MSTMRU.initMultiScanTableMapperJob('metrics', scans, None, None, None, job) # inform HBaseDriver about the columns we expect to receive job.getConfiguration().set("org.mozilla.jydoop.hbasecolumns", "data:json")
def setupjob(job, args): """ Set up a job to run on crash-stats date ranges. Expects three arguments: startdate (yymmdd) enddate (yymmdd) """ import java.text.SimpleDateFormat as SimpleDateFormat import java.util.Calendar as Calendar import com.mozilla.hadoop.hbase.mapreduce.MultiScanTableMapReduceUtil as MSTMRU from com.mozilla.util import Pair if len(args) != 2: raise Exception("Usage: <startdate-yymmdd> <enddate-yymmdd>") startarg, endarg = args sdf = SimpleDateFormat(dateformat) startdate = Calendar.getInstance() startdate.setTime(sdf.parse(startarg)) enddate = Calendar.getInstance() enddate.setTime(sdf.parse(endarg)) columns = [Pair(family, qualifier) for family, qualifier in columnlist] scans = MSTMRU.generateHexPrefixScans(startdate, enddate, dateformat, columns, 500, False) MSTMRU.initMultiScanTableMapperJob( 'crash_reports', scans, None, None, None, job) # inform HadoopDriver about the columns we expect to receive job.getConfiguration().set("org.mozilla.jydoop.hbasecolumns", ','.join(':'.join(column) for column in columnlist))
def setupjob(job, args): """ Set up a job to run on crash-stats date ranges. Expects three arguments: startdate (yymmdd) enddate (yymmdd) """ import java.text.SimpleDateFormat as SimpleDateFormat import java.util.Calendar as Calendar import com.mozilla.hadoop.hbase.mapreduce.MultiScanTableMapReduceUtil as MSTMRU from com.mozilla.util import Pair if len(args) != 2: raise Exception("Usage: <startdate-yymmdd> <enddate-yymmdd>") startarg, endarg = args sdf = SimpleDateFormat(dateformat) startdate = Calendar.getInstance() startdate.setTime(sdf.parse(startarg)) enddate = Calendar.getInstance() enddate.setTime(sdf.parse(endarg)) columns = [Pair(family, qualifier) for family, qualifier in columnlist] scans = MSTMRU.generateHexPrefixScans(startdate, enddate, dateformat, columns, 500, False) MSTMRU.initMultiScanTableMapperJob( 'crash_reports', scans, None, None, None, job) # inform HBaseDriver about the columns we expect to receive job.getConfiguration().set("org.mozilla.jydoop.hbasecolumns", ','.join(':'.join(column) for column in columnlist))