Beispiel #1
0
    def test_range_dates(self):
        "Test range_dates function"
        date1 = '20160129'
        date2 = '20160201'
        dates = range_dates([date1,date2])
        expect = ['2016/01/29', '2016/01/30', '2016/01/31', '2016/02/01']
        self.assertEqual(expect, dates)

        date1 = '20151229'
        date2 = '20160101'
        dates = range_dates([date1,date2])
        expect = ['2015/12/29', '2015/12/30', '2015/12/31', '2016/01/01']
        self.assertEqual(expect, dates)

        date1 = 20160101
        date2 = 20160102
        dates = range_dates([date1,date2])
        expect = ['2016/01/01', '2016/01/02']
        self.assertEqual(expect, dates)

        date1 = 20160101
        date2 = 20160101
        dates = range_dates([date1,date2])
        expect = ['2016/01/01']
        self.assertEqual(expect, dates)
Beispiel #2
0
    def test_range_dates(self):
        "Test range_dates function"
        date1 = "20160129"
        date2 = "20160201"
        dates = range_dates([date1, date2])
        expect = ["2016/01/29", "2016/01/30", "2016/01/31", "2016/02/01"]
        self.assertEqual(expect, dates)

        date1 = "20151229"
        date2 = "20160101"
        dates = range_dates([date1, date2])
        expect = ["2015/12/29", "2015/12/30", "2015/12/31", "2016/01/01"]
        self.assertEqual(expect, dates)

        date1 = 20160101
        date2 = 20160102
        dates = range_dates([date1, date2])
        expect = ["2016/01/01", "2016/01/02"]
        self.assertEqual(expect, dates)

        date1 = 20160101
        date2 = 20160101
        dates = range_dates([date1, date2])
        expect = ["2016/01/01"]
        self.assertEqual(expect, dates)
Beispiel #3
0
def main():
    "Main function"
    optmgr  = OptionParser()
    opts = optmgr.parser.parse_args()
    time0 = time.time()

    if  opts.scripts:
        scripts()
        sys.exit(0)

    todate = datetime.datetime.today()
    todate = int(todate.strftime("%Y%m%d"))
    fromdate = datetime.datetime.today()-datetime.timedelta(days=1)
    fromdate = int(fromdate.strftime("%Y%m%d"))
    spec = json.load(open(opts.spec)) if opts.spec else {}
    timerange = spec.get('spec', {}).get('timerange', [fromdate, todate])

    if  opts.hdir == HDIR:
        hdir = opts.hdir.split()
        if  len(hdir) == 1:
            hdir = hdir[0]
            hdirs = []
            for tval in range_dates(timerange):
                if  hdir.find(tval) == -1:
                    hdirs.append(os.path.join(hdir, tval))
            hdir = hdirs
    else:
        hdir = opts.hdir
    results = run(opts.schema, hdir, opts.script, opts.spec, opts.verbose, opts.rout, opts.yarn)
    if  opts.store:
        data = {"results":results,"ts":time.time(),"etime":time.time()-time0}
        if  opts.wmaid:
            data['wmaid'] = opts.wmaid
        else:
            data['wmaid'] = wmaHash(data)
        data['dtype'] = 'job'
        pdata = dict(job=data)
        postdata(opts.store, pdata, opts.ckey, opts.cert, opts.verbose)
    elif opts.amq:
        creds = credentials(opts.amq)
        host, port = creds['host_and_ports'].split(':')
        if  creds and StompAMQ:
            print("### Send %s docs via StompAMQ" % len(results))
            amq = StompAMQ(creds['username'], creds['password'], \
                    creds['producer'], creds['topic'], [(host, port)])
            amq.send(results)
    else:
        print(results)
Beispiel #4
0
def main():
    "Main function"
    optmgr = OptionParser()
    opts = optmgr.parser.parse_args()
    time0 = time.time()

    if opts.scripts:
        scripts()
        sys.exit(0)

    verbose = opts.verbose
    todate = datetime.datetime.today()
    todate = int(todate.strftime("%Y%m%d"))
    fromdate = datetime.datetime.today() - datetime.timedelta(days=1)
    fromdate = int(fromdate.strftime("%Y%m%d"))
    spec = {}
    try:
        if os.path.isfile(opts.spec):
            spec = json.load(open(opts.spec))
        else:
            spec = json.loads(opts.spec)
    except Exception as exp:
        pass
    timerange = spec.get('spec', {}).get('timerange', [fromdate, todate])
    if timerange and verbose:
        print("### TimeRang: %s" % timerange)

    hdir = opts.hdir
    if timerange:
        pat = re.compile(".*/20[0-9][0-9].*")
        if len(hdir.split()) == 1 and not pat.match(hdir):
            hdir = hdir.split()[0]
            hdirs = []
            for tval in range_dates(timerange):
                if hdir.find(tval) == -1:
                    hdirs.append(os.path.join(hdir, tval))
            hdir = hdirs
    if verbose:
        print("### HDIR: %s" % hdir)
    results = run(opts.schema, hdir, opts.script, opts.spec, verbose,
                  opts.rout, opts.yarn)
    if opts.store:
        data = {
            "results": results,
            "ts": time.time(),
            "etime": time.time() - time0
        }
        if opts.wmaid:
            data['wmaid'] = opts.wmaid
        else:
            data['wmaid'] = wmaHash(data)
        data['dtype'] = 'job'
        pdata = dict(job=data)
        postdata(opts.store, pdata, opts.ckey, opts.cert, verbose)
    elif opts.amq:
        creds = credentials(opts.amq)
        host, port = creds['host_and_ports'].split(':')
        port = int(port)
        if creds and StompAMQ:
            print("### Send %s docs via StompAMQ" % len(results))
            amq = StompAMQ(creds['username'], creds['password'], \
                    creds['producer'], creds['topic'], \
                    validation_schema=None, \
                    host_and_ports=[(host, port)])
            data = []
            for doc in results:
                hid = doc.get("hash", 1)
                if '_id' in doc:
                    del doc['_id']  # delete ObjectID from MongoDB
                producer = "wmarchive"
                tstamp = int(time.time()) * 1000
                notification, _, _ = amq.make_notification(doc,
                                                           hid,
                                                           producer=producer,
                                                           ts=tstamp,
                                                           dataSubfield="")
                data.append(notification)
            results = amq.send(data)
            print("### results from AMQ %s" % len(results))
    else:
        if isinstance(results, list):
            print("### number of results %s" % len(results))
            for doc in results:
                if '_id' in doc:
                    del doc['_id']  # delete ObjectID from MongoDB
                try:
                    print(json.dumps(doc))
                except:
                    print(doc)
        else:
            print(results)
Beispiel #5
0
def main():
    "Main function"
    optmgr = OptionParser()
    opts = optmgr.parser.parse_args()
    time0 = time.time()

    if opts.scripts:
        scripts()
        sys.exit(0)

    verbose = opts.verbose
    todate = datetime.datetime.today()
    todate = int(todate.strftime("%Y%m%d"))
    fromdate = datetime.datetime.today() - datetime.timedelta(days=1)
    fromdate = int(fromdate.strftime("%Y%m%d"))
    spec = {}
    try:
        if os.path.isfile(opts.spec):
            spec = json.load(open(opts.spec))
        else:
            spec = json.loads(opts.spec)
    except Exception as exp:
        pass
    timerange = spec.get('spec', {}).get('timerange', [fromdate, todate])
    if timerange and verbose:
        print("### TimeRang: %s" % timerange)

    hdir = opts.hdir
    if timerange:
        pat = re.compile(".*/20[0-9][0-9].*")
        if len(hdir.split()) == 1 and not pat.match(hdir):
            hdir = hdir.split()[0]
            hdirs = []
            for tval in range_dates(timerange):
                if hdir.find(tval) == -1:
                    hdfs_file_path = os.path.join(hdir, tval)
                    # check whether the hdfs path exists
                    cmd = ['hdfs', 'dfs', '-test', '-d', hdfs_file_path]
                    ret, out, err = run_cmd(cmd)
                    if ret == 0:
                        hdirs.append(hdfs_file_path)
                    else:
                        print "Path does not exist:", hdfs_file_path
            hdir = hdirs
    if verbose:
        print("### HDIR: %s" % hdir)
    if opts.logfail:
        print "Start query"
        runActionsHistoryQuery(opts.schema, hdir, verbose, opts.yarn)
        print "Finish query"
    else:
        results = run(opts.schema, hdir, opts.script, opts.spec, verbose,
                      opts.rout, opts.yarn)
        if opts.store:
            data = {
                "results": results,
                "ts": time.time(),
                "etime": time.time() - time0
            }
            if opts.wmaid:
                data['wmaid'] = opts.wmaid
            else:
                data['wmaid'] = wmaHash(data)
            data['dtype'] = 'job'
            pdata = dict(job=data)
            postdata(opts.store, pdata, opts.ckey, opts.cert, verbose)
        elif opts.amq:
            creds = credentials(opts.amq)
            host, port = creds['host_and_ports'].split(':')
            port = int(port)
            if creds and StompAMQ:
                print("### Send %s docs via StompAMQ" % len(results))
                amq = StompAMQ(creds['username'], creds['password'], \
                        creds['producer'], creds['topic'], [(host, port)])
                data = []
                for doc in results:
                    hid = doc.get("hash", 1)
                    if '_id' in doc:
                        del doc['_id']  # delete ObjectID from MongoDB
                    data.append(amq.make_notification(doc, hid))
                results = amq.send(data)
                print("### results from AMQ %s" % len(results))
        else:
            if isinstance(results, list):
                print("### number of results %s" % len(results))
                for doc in results:
                    if '_id' in doc:
                        del doc['_id']  # delete ObjectID from MongoDB
                    try:
                        print(json.dumps(doc))
                    except:
                        print(doc)
            else:
                print(results)
Beispiel #6
0
def make_hdfs_path(hdir, trange):
    """
    Create an HDFS paths to look at from provided main hdfs dir
    and provided time range list.
    """
    return ['%s/%s' % (hdir, d) for d in range_dates(trange)]
Beispiel #7
0
def main():
    "Main function"
    optmgr = OptionParser()
    opts = optmgr.parser.parse_args()
    time0 = time.time()

    if opts.scripts:
        scripts()
        sys.exit(0)

    todate = datetime.datetime.today()
    todate = int(todate.strftime("%Y%m%d"))
    fromdate = datetime.datetime.today() - datetime.timedelta(days=1)
    fromdate = int(fromdate.strftime("%Y%m%d"))
    spec = {}
    try:
        if os.path.isfile(opts.spec):
            spec = json.load(open(opts.spec))
        else:
            spec = json.loads(opts.spec)
    except Exception as exp:
        pass
    timerange = spec.get('spec', {}).get('timerange', [fromdate, todate])

    if opts.hdir == HDIR:
        hdir = opts.hdir.split()
        if len(hdir) == 1:
            hdir = hdir[0]
            hdirs = []
            for tval in range_dates(timerange):
                if hdir.find(tval) == -1:
                    hdirs.append(os.path.join(hdir, tval))
            hdir = hdirs
    else:
        hdir = opts.hdir
    results = run(opts.schema, hdir, opts.script, opts.spec, opts.verbose,
                  opts.rout, opts.yarn)
    if opts.store:
        data = {
            "results": results,
            "ts": time.time(),
            "etime": time.time() - time0
        }
        if opts.wmaid:
            data['wmaid'] = opts.wmaid
        else:
            data['wmaid'] = wmaHash(data)
        data['dtype'] = 'job'
        pdata = dict(job=data)
        postdata(opts.store, pdata, opts.ckey, opts.cert, opts.verbose)
    elif opts.amq:
        creds = credentials(opts.amq)
        host, port = creds['host_and_ports'].split(':')
        port = int(port)
        if creds and StompAMQ:
            print("### Send %s docs via StompAMQ" % len(results))
            amq = StompAMQ(creds['username'], creds['password'], \
                    creds['producer'], creds['topic'], [(host, port)])
            data = []
            for doc in results:
                hid = doc.get("hash", 1)
                if '_id' in doc:
                    del doc['_id']  # delete ObjectID from MongoDB
                data.append(amq.make_notification(doc, hid))
            results = amq.send(data)
            print("### results from AMQ", len(results))
    else:
        print("### agg. results", len(results))
Beispiel #8
0
def make_hdfs_path(hdir, trange):
    """
    Create an HDFS paths to look at from provided main hdfs dir
    and provided time range list.
    """
    return ['%s/%s' % (hdir, d) for d in range_dates(trange)]