Exemple #1
0
out.writerow("predict R Dopt S Xn Yn elapsed".split())

matched = "all"

fields = 'type, tract, area, date'

if opts.areas != "all":
    print "Matching areas", opts.areas
    data = select(input, areas=opts.areas, fields=fields, start_date=start_date, end_date=end_date) #input[match_areas(input, opts.areas)]
    matched = opts.areas
elif opts.tracts != "all":
    print "Matching tracts", opts.tracts
    data = select(input, tracts=opts.tracts, fields=fields, start_date=start_date, end_date=end_date) #input[match_tracts(input, opts.tracts)]
    matched = opts.tracts
else:
    data = select(input, fields, start_date=start_date, end_date=end_date) #, input

with mytimer:
    R, Dopt = search_streams(data, np.array(streams))

Xn = np.sum(match_streams(data,Dopt))
Yn = np.sum(match_streams(data,[opts.predict]))
print "\n\n-------------------- In area", matched
print "Correlation = %.05f"%R
print "for predicting", opts.predict, "with these leading indicators:", ', '.join(Dopt)
print "# of events in X:", Xn
print "# of events in Y:", Yn
out.writerow([opts.predict, R, Dopt, matched, Xn, Yn, mytimer.elapsed])

print "search complete, output written to %s"%(opts.output)
fields = 'type, tract, area, date'

if opts.areas != "all":
    print "Matching areas", opts.areas
    data = select(input, areas=opts.areas, fields=fields, start_date=start_date, end_date=end_date) #input[match_areas(input, opts.areas)]
    matched = opts.areas
elif opts.tracts != "all":
    print "Matching tracts", opts.tracts
    data = select(input, tracts=opts.tracts, fields=fields, start_date=start_date, end_date=end_date) #input[match_tracts(input, opts.tracts)]
    matched = opts.tracts
else:
    for matched in range(1,78):
        data = select(input, areas=[matched], fields=fields, start_date=start_date, end_date=end_date) #, input

        with mytimer:
            R_a, Dopt = search_streams(data, np.array(streams), daterange=TIME_PERIOD_A)


        X = match_streams(data,Dopt)
        Y = match_streams(data,[opts.predict]) 
        Xn = np.sum(X)
        Yn = np.sum(Y)
        X_ts = time_series(data[X], daterange=TIME_PERIOD_B)
        Y_ts = time_series(data[Y],lag=Y_LAG, daterange=TIME_PERIOD_B)
        R_b = X_ts.corr(Y_ts)
        X_ts = time_series(data[X], daterange=daterange)
        Y_ts = time_series(data[Y],lag=Y_LAG, daterange=daterange)
        R = X_ts.corr(Y_ts)
        print "\n\n-------------------- In area", matched
        print "Correlation = %.05f"%R
        print "for predicting", opts.predict, "with these leading indicators:", ', '.join(Dopt)