Beispiel #1
0
def main():
    inputArgs = sys.argv
    args = inputArgs[1:]
    yearsTempList, magnitudeOver, overwrite = Input.getValues(args)
    years = Database.queryInput(yearsTempList, magnitudeOver, overwrite)
    StoreData.createFolder()
    print "Requesting earthquakes data with magnitude over {}, for years: {}".format(magnitudeOver, years)
    for year in years:
        print "Processing year: ", year
        print "Data acquisition starts"
        firstDate = date(year, 1, 1)
        lastDate = date(year, 12, 31)
        for d in dateRange(firstDate, lastDate):
            start = d.strftime("%Y-%m-%d") + "T00:00:00.000Z"
            end = (d + timedelta(days=1)).strftime("%Y-%m-%d") + "T00:00:00.000Z"
            try:
                eq_list_raw = Acquisition.request(start, end, magnitudeOver)
                eq_list_temp = Preprocessing.cleanHeaders(eq_list_raw)
                eq_list = Preprocessing.splitDateTime(eq_list_temp)
                StoreData.toFile(eq_list, year, d,magnitudeOver)
            except Exception as error:
                print "Error while processing a request:", error
        print "Data acquisition ended"
        path = HDFS.getPath()
        HDFS.put('../data/earthquakes{}mag{}.csv'.format(year, magnitudeOver), path)
Beispiel #2
0
def main():
    Log.info('-----------------------')
    Log.info('Download process starts')
    Log.info('-----------------------')
    inputArgs = sys.argv
    args = inputArgs[1:]
    StoreData.createFolder()
    yearsTempList, magnitudeOver, download_again = Input.getValues(args)
    path = HDFS.getPath()
    Log.info("Earthquakes acquisition starts..")
    years = Database.QueryInput(yearsTempList, magnitudeOver, download_again)
    Log.info(
        "Requesting earthquakes data with magnitude over {}, for years: {}".
        format(magnitudeOver, years))
    for year in years:
        Log.info("Processing year: {}".format(year))
        Log.info("Earthquakes acquisition starts.")
        firstDate = date(year, 1, 1)
        lastDate = date(year, 12, 31)
        for d in dateRange(firstDate, lastDate):
            start = d.strftime("%Y-%m-%d") + "T00:00:00.000Z"
            end = (d +
                   timedelta(days=1)).strftime("%Y-%m-%d") + "T00:00:00.000Z"
            try:
                eq_list_raw = Acquisition.Request(start, end, magnitudeOver)
                eq_list_no_headers = Preprocessing.cleanHeaders(eq_list_raw)
                eq_list_split_date_time = Preprocessing.splitDateTime(
                    eq_list_no_headers)
                eq_list = Preprocessing.checkCountry(eq_list_split_date_time)
                StoreData.toFile(eq_list, year, d, magnitudeOver)
            except Exception as error:
                Log.error("Error while processing a Request:")
                Log.error(error)
        Log.info("Earthquakes acquisition for  year {} finished".format(year))

        HDFS.put(
            '../../data/earthquakes-history/earthquakes{}mag{}.csv'.format(
                year, magnitudeOver), path)
    Log.info('---------------------')
    Log.info('Download process ends')
    Log.info('---------------------')
def main():
    StoreData.createFolder()
    try:
        interval = int(sys.argv[1])
        print "Interval parameter passed: ", interval
    except:
        interval = 10
        print "No interval parameter passed, interval default value ", interval
    end = datetime.utcnow() - timedelta(minutes=10)
    start = end - timedelta(minutes=interval)
    while True:
        print "Data acquisition starts"
        print "Requesting earthquakes data"
        print "from ", start
        print "to   ", end
        eq_list_raw = Acquisition.request(start, end)
        eq_list_temp = Preprocessing.cleanHeaders(eq_list_raw)
        eq_list = Preprocessing.splitDateTime(eq_list_temp)
        StoreData.toFile(eq_list)
        print "Data acquisition ended"
        print "Process starts again in {} minutes".format(interval)
        time.sleep(interval * 60)
        end = datetime.utcnow() - timedelta(minutes=10)
        start = end - timedelta(minutes=interval)