def loss(flow, predictions): flow = flow * 0.05 losses = [] INPUT_HEIGHT, INPUT_WIDTH = float(flow.shape[1].value), float( flow.shape[2].value) # L2 loss between predict_flow6, blob23 (weighted w/ 0.32) predict_flow6 = predictions['predict_flow6'] size = [predict_flow6.shape[1], predict_flow6.shape[2]] downsampled_flow6 = downsample.downsample(flow, size) losses.append(average_endpoint_error(downsampled_flow6, predict_flow6)) # L2 loss between predict_flow5, blob28 (weighted w/ 0.08) predict_flow5 = predictions['predict_flow5'] size = [predict_flow5.shape[1], predict_flow5.shape[2]] downsampled_flow5 = downsample.downsample(flow, size) losses.append(average_endpoint_error(downsampled_flow5, predict_flow5)) # L2 loss between predict_flow4, blob33 (weighted w/ 0.02) predict_flow4 = predictions['predict_flow4'] size = [predict_flow4.shape[1], predict_flow4.shape[2]] downsampled_flow4 = downsample.downsample(flow, size) losses.append(average_endpoint_error(downsampled_flow4, predict_flow4)) # L2 loss between predict_flow3, blob38 (weighted w/ 0.01) predict_flow3 = predictions['predict_flow3'] size = [predict_flow3.shape[1], predict_flow3.shape[2]] downsampled_flow3 = downsample.downsample(flow, size) losses.append(average_endpoint_error(downsampled_flow3, predict_flow3)) # L2 loss between predict_flow2, blob43 (weighted w/ 0.005) predict_flow2 = predictions['predict_flow2'] size = [predict_flow2.shape[1], predict_flow2.shape[2]] downsampled_flow2 = downsample.downsample(flow, size) losses.append(average_endpoint_error(downsampled_flow2, predict_flow2)) loss = tf.losses.compute_weighted_loss(losses, [0.32, 0.08, 0.02, 0.01, 0.005]) # Return the 'total' loss: loss fns + regularization terms defined in the model return tf.losses.get_total_loss()
def test_001_t(self): src_data = (1, 2, 3, 4, 5, 6, 7, 8) expected_result = (1, 3, 5, 7) src = blocks.vector_source_f(src_data) print "derp" upsample = downsample(2) print "derp" snk = blocks.vector_sink_f() self.tb.connect(src, upsample) self.tb.connect(upsample, snk) self.tb.run() result_data = snk.data() self.assertFloatTuplesAlmostEqual(expected_result, result_data, 6)
def loss(flow, predictions): losses = [] # L2 loss between predict_disp6, blob23 (weighted w/ 0.32) predict_disp6 = predictions['disp6'] size = [predict_disp6.shape[1], predict_disp6.shape[2]] downsampled_disp6 = downsample.downsample(flow, size) losses.append( tf.losses.mean_squared_error(downsampled_disp6, predict_disp6)) # L2 loss between predict_disp5, blob28 (weighted w/ 0.08) predict_disp5 = predictions['disp5'] size = [predict_disp5.shape[1], predict_disp5.shape[2]] downsampled_disp5 = downsample.downsample(flow, size) losses.append( tf.losses.mean_squared_error(downsampled_disp5, predict_disp5)) # L2 loss between predict_disp4, blob33 (weighted w/ 0.02) predict_disp4 = predictions['disp4'] size = [predict_disp4.shape[1], predict_disp4.shape[2]] downsampled_disp4 = downsample.downsample(flow, size) losses.append( tf.losses.mean_squared_error(downsampled_disp4, predict_disp4)) # L2 loss between predict_disp3, blob38 (weighted w/ 0.01) predict_disp3 = predictions['disp3'] size = [predict_disp3.shape[1], predict_disp3.shape[2]] downsampled_disp3 = downsample.downsample(flow, size) losses.append( tf.losses.mean_squared_error(downsampled_disp3, predict_disp3)) # L2 loss between predict_disp2, blob43 (weighted w/ 0.005) predict_disp2 = predictions['disp2'] size = [predict_disp2.shape[1], predict_disp2.shape[2]] downsampled_disp2 = downsample.downsample(flow, size) losses.append( tf.losses.mean_squared_error(downsampled_disp2, predict_disp2)) predict_disp1 = predictions['disp1'] size = [predict_disp1.shape[1], predict_disp1.shape[2]] downsampled_disp1 = downsample.downsample(flow, size) losses.append( tf.losses.mean_squared_error(downsampled_disp1, predict_disp1)) predict_disp0 = predictions['disp0'] size = [predict_disp0.shape[1], predict_disp0.shape[2]] downsampled_disp0 = downsample.downsample(flow, size) losses.append( tf.losses.mean_squared_error(downsampled_disp0, predict_disp0)) #loss = tf.losses.compute_weighted_loss(losses, [0.005, 0.01, 0.02, 0.04, 0.08, 0.16, 0.32]) # Return the 'total' loss: loss fns + regularization terms defined in the model return losses[0] * 0.32 + losses[1] * 0.16 + losses[2] * 0.08 + losses[ 3] * 0.04 + losses[4] * 0.02 + losses[5] * 0.01 + losses[6] * 0.005
def clean(folder_name): print('-----------------') print('Digesting: {}'.format(folder_name)) print('Merging all CSV files in one...') os.system('cat {}/emojis_raw/{}/* > {}/clean_emojis/{}_unified.csv'.format(base_path, folder_name, base_path, folder_name)) print('Removing headers...') remove = '"username","date","retweets","favorites","text","geo","mentions","hashtags","id","permalink","emoji"' os.system("awk '!/{}/' {}/clean_emojis/{}_unified.csv > temp && mv temp {}/clean_emojis/{}_no_header.csv".format(remove, base_path, folder_name, base_path, folder_name)) date_dict = {} for date in date_array: date_dict[date] = 0 print('Digesting...') with open('{}/clean_emojis/{}_no_header.csv'.format(base_path, folder_name)) as csv_file: csv_reader = csv.reader(csv_file, delimiter=',') line_count = 0 for row in tqdm(csv_reader): try: datetime_object = datetime.datetime.strptime(row[1],'%Y-%m-%d %H:%M').replace(hour=0,minute=0) date_dict[datetime_object] += 1 except: pass line_count +=1 print('Deleting intermediate files...') os.system('rm {}/clean_emojis/{}_unified.csv'.format(base_path, folder_name)) print("Writing CSV...") with open('{}/emojis_3600/{}.csv'.format(base_path, folder_name), mode='w') as f: writer = csv.writer(f, delimiter=',') writer.writerow(['day,usage']) for date in date_array: writer.writerow([date.strftime("%Y-%m-%d"), date_dict[date]]) downsample('{}/emojis_3600/{}.csv'.format(base_path, folder_name), '{}/emojis_50/{}.csv'.format(base_path, folder_name), downsample_factor)
def test_balance_set(): """ Given a set of labels of a training set balance_set will determine how balanced the set is and then downsample to a specfied proportion Input ----- fraction_1s: float fraction of ones we want fraction_0s: float fraction of zeros we want Output ------ labels: ls index of labels that have the proper portion to downsample """ config = sett.SetContainer(test_run_config, model_config) #data = get_feature_table(config.tablename) data = pd.read_csv(test_csv).set_index('block_year') break_window = '2Year' pX_train, Y_train, pX_valid, Y_valid, pX_test, Y_test, date_dic = train_valid_test_split( data, break_window, config.static_features, config.cv_cuts['thirty_seventy'], config.past, config.future, past_yr=4) ls_balance = [[0.3, 0.7], [0.2, 0.8], [0.1, 0.9], [0.5, 0.5]] for balance in ls_balance: break_bal, nobreak_bal = balance X_bal, Y_bal = downsample(pX_train, Y_train, downsample_balance=balance, Verbose=True) print 'Y_train: ', np.sum(Y_train == 1) print check_balance(Y_train) balance_after = check_balance(Y_bal) assert np.isclose(break_bal, balance_after['break'], atol=1e-4), '{} {}'.format(break_bal, balance_after['break']) assert np.isclose(nobreak_bal, balance_after['no_break'], atol=1e-4) print balance_after
for fname in os.listdir(directory): # print fname t1, v1 = read_scope.read_scope(directory + fname) # Shaping filter v2 = lpf.lpfFirstOrder(v1, TAU, 10) # shaping, 10GSPS t2 = t1 # Simulate antialiasing filter (bessel) b, a = scipy.signal.bessel(NFO, FBK / (10000. / 2.), 'low') # v2 = scipy.signal.filtfilt(b, a, v1) v2 = scipy.signal.filtfilt(b, a, v2) t2 = t1 # Downsample t3, v3 = downsample.downsample(t2, v2, 10. / FGSPS) # 250MSPS # Discriminator found, tddc = ddc.disc_neg(t3[10:], v3[10:]) if (found): plt.plot(t1, v1) # Full BW, 10GSPS # plt.plot(t2,v2,'.-') # LPF to simulate front end # Downsample t3, v3 = downsample.downsample(t2, v2, 10. / FGSPS) # 250MSPS plt.plot(t3, v3, '.-') # LPF to simulate front end # Boxcar t4, v4 = boxcar.boxcar(t3, v3, NAVG1) plt.plot(t4, v4, '.-')
ymin = np.min(y) yscale = [yi / ymin for yi in y] if (i == 0): yavg = np.zeros(len(y)) xavg = copy.copy(x) yavg = [(yavgi * i + yscalei) / (i + 1) for yavgi, yscalei in zip(yavg, yscale)] # print len(xavg),len(yavg) # 1.) Low pass filter y1 = lpf.lpfFirstOrder(y, TAU, SCOPE_FSPS) peak_1.append(np.min(y1)) # 2.) Downsample x2, y2 = downsample.downsample(x, y1, dsf=SCOPE_FSPS / DIG_FSPS) pk = np.min(y2) peak_2.append(pk) q = trap_int.trap_int(x2, y2, 0, len(x2) - 1) charge_2.append(q / (50. * 1.6 * 10**-19)) # 3.) CFD with no interpolation xprev = 0. yprev = 0. for x2i, y2i in zip(x2, y2): if y2i < 0.5 * pk: time_cfd.append(x2i) # tinterp = xprev + ((x2i-xprev)/(y2i-yprev))*0.5*pk # time_cfd_interp.append(tinterp) xprev = x2i yprev = y2i
def least_squares_fit(filename, variable1, variable2): with tables.openFile(filename, 'r') as data: # fetch values variable 1 and 2 variable_1 = data.root.correlation.table.col('variable1') variable_2 = data.root.correlation.table.col('variable2') y_axis = query_yes_no("Do you want to plot %s on the y-axis?" % variable1[0][0]) if len(variable_1.shape) != 1: print 'There are %d plates with an individual %s value.' % (variable_1.shape[1], variable1[0][0]) plate_number1 = int(question.digit_plate("Enter the plate number that you want to you use in your correlation analysis ( e.g. '1' ): ", variable_1.shape[1])) variable_1 = variable_1[:, plate_number1 - 1] if len(variable_2.shape) != 1: print 'There are %d plates with an individual %s value.' % (variable_2.shape[1], variable2[0][0]) plate_number2 = int(question.digit_plate("Enter the plate number that you want to you use in your correlation analysis ( e.g. '1' ): ", variable_2.shape[1])) variable_2 = variable_2[:, plate_number2 - 1] if y_axis == True: y = variable_1 # e.g. 'event_rates' x = variable_2 # e.g. 'barometric pressure' x, y = lose_nans(x, y) elif y_axis == False: x = variable_1 # e.g. 'event_rates' y = variable_2 # e.g. 'barometric pressure' else: print 'weird' del variable_1, variable_2 # Apply a linear least square fit: # a line, ``y = mx + c``, through the data-points: # We can rewrite the line equation as ``y = Ap``, where ``A = [[x 1]]`` # and ``p = [[m], [c]]``. Now use `lstsq` to solve for `p`: A = np.vstack([x, np.ones(len(x))]).T a, b = np.linalg.lstsq(A, y)[0] del A if y_axis == True: print '' print "The equation for the linear fit line is: ( y = a * x + b ) y = " + str(a) + " * x + " + str(b) print '' print "or '" + variable1[0][0] + "' = " + str(a) + " * '" + variable2[0][0] + "' + " + str(b) elif y_axis == False: print '' print "The equation for the linear fit line is: ( y = a * x + b ) y = " + str(a) + " * x + " + str(b) print '' print "or '" + variable2[0][0] + "' = " + str(a) + " * '" + variable1[0][0] + "' + " + str(b) # Calculate sample pearson correlation coefficient cor_coef = np.corrcoef([x, y])[0, 1] absolute_cor_coef = abs(cor_coef) print '' pearson_text = "The Pearson correlation coefficient between '%s' and '%s' is: %s" % (variable1[0][0], variable2[0][0], str(cor_coef)) print pearson_text print '' if absolute_cor_coef < 0.1: correlation = 'NO' elif 0.1 <= absolute_cor_coef <= 0.3: correlation = 'a SMALL' elif 0.3 <= absolute_cor_coef <= 0.5: correlation = 'a MEDIUM' elif 0.5 <= absolute_cor_coef <= 1: correlation = 'a STRONG' else: correlation = '' if cor_coef >= 0.1: pos_neg = ' POSITIVE' elif cor_coef <= -0.1: pos_neg = ' NEGATIVE' else: pos_neg = '' conclusion = "For this sample you have found %s%s correlation between '%s' and '%s'." % (correlation, pos_neg, variable1[0][0], variable2[0][0]) print conclusion """ # calculate chi squared list_exp = array([a*i + b for i in x]) begin3 = datetime.now() chi2, p = chisquare(y,list_exp) end3 = datetime.now() print end3 - begin3 combo = zip(y,list_exp) begin = datetime.now() ch2 = 0 for i in combo: ch2 = ch2 + (i[0]-i[1]-0.5)**2/i[1] print 'chi squared is ', ch2 end = datetime.now() print end - begin print '' print 'chi squared:', chi2 print 'associated p-value: ', p print '' degrees_of_freedom = (len(x) - 1) print 'chi squared divided by the number of measurements: ', chi2/degrees_of_freedom chi2_prob = chisqprob(chi2,degrees_of_freedom) # probability value associated with the provided chi-square value and degrees of freedom print 'probability value associated with the provided chi-square value and degrees of freedom:', chi2_prob """ # Plot the data along with the fitted line: if(len(x) > 500000): x, y = downsample(x, y) plt.plot(x, y, 'o', label='Original data', markersize=1) plt.plot(x, a * x + b, 'r', label='Fitted line') if y_axis == True: plt.ylabel(variable1[0][0] + ' (' + units[variable1[0][0]] + ')') plt.xlabel(variable2[0][0] + ' (' + units[variable2[0][0]] + ')') elif y_axis == False: plt.ylabel(variable2[0][0] + ' (' + units[variable2[0][0]] + ')') plt.xlabel(variable1[0][0] + ' (' + units[variable1[0][0]] + ')') tit = "Fit line: ( y = ax + b ) y = " + str(a) + " * x + " + str(b) plt.legend() plt.title(tit) start_date_interval, stop_date_interval = get_date_interval_from_file_names(variable1, variable2) inter_filename = filename.replace('.h5', '') fname = inter_filename + ' ' + start_date_interval + '_' + stop_date_interval plt.savefig(fname + ".png") plt.show() fit_info = open(fname + '.txt', 'w') fit_info.write(tit) fit_info.write("%s\n" % ('')) fit_info.write(str(pearson_text)) fit_info.write("%s\n" % ('')) fit_info.write(str(conclusion)) fit_info.close """ # calculate mean y value mean_y = sum(y) / len(y) print 'mean_y = ', mean_y relative_deviation_from_mean_y_list = [] #relative deviation of the cosmic ray intensity (deltaI/I) from the mean intensity. for i in range(len(y)): deviation_of_mean_y = y[i] - mean_y relative_deviation_from_mean_y = deviation_of_mean_y/mean_y relative_deviation_from_mean_y_list.append(relative_deviation_from_mean_y) plt.plot(x,relative_deviation_from_mean_y_list,'o',markersize=1) plt.ylabel('deltaMPV_p/<MPV_p>') plt.xlabel('Outside temperature (degrees Celsius)') tit = "Correlation between the Relative deviation of the MPV of the pulseheight (3h intervals) from the mean MPV value with the outside temperature." plt.title(tit) fname = 'Correlation between relative deviation of the MPV of the pulseheights (3h intervals) from the mean MPV value with T_out' plt.savefig(fname +".png") plt.show() """ """
def verify(gsmall, gbig, atlas): gbig = downsample(gbig, atlas=atlas) # gbig has been downsampled assert ds_big.get_adjacency() == gsmall.get_adjacency(), "Adjacency matrices unequal!" assert gbig.es["weight"] == gsmall.es["weight"], "Adjacency matrix weights unequal!"
# Fri Dec 21 16:46:58 EST 2018 import sys sys.path.append('../') import numpy as np import matplotlib.pyplot as plt import downsample for i in range(8010): fin = open('/media/tyler/Seagate Expansion Drive/20181220_watchman_spe_filter/l3/%05d.txt' % i) print i x = [] y = [] for line in fin: x.append(float(line.split(',')[0])) y.append(float(line.split(',')[1])) fin.close() # 1.) Downsample x1,y1 = downsample.downsample(x,y,dsf=40) plt.plot(x,y) plt.plot(x1,y1,'o') plt.ylim(-0.0250,0.005) plt.show()
def verify(gsmall, gbig, atlas): gbig = downsample(gbig, atlas=atlas) # gbig has been downsampled assert ds_big.get_adjacency() == gsmall.get_adjacency( ), "Adjacency matrices unequal!" assert gbig.es["weight"] == gsmall.es[ "weight"], "Adjacency matrix weights unequal!"
def run_pipeline(run_config): """ Main function for running throug the pipeline #read the model config and run yaml file here. The run_pipeline does the following; - Load the configuration file - Load the data from the features table - Loops through break windows and past_yrs - In the loop implement a cross_validation strategy that is either "seventy_thirty' or 'no_overlap' - In the loop run through models and parameters. **If the debug flag is True then there will be no output to the DB.** **The writeToDB must be True to write to the DB** Input ----- run_config: yaml object run configutaion for doing a run. """ config = sett.SetContainer(run_config, model_config) data = get_feature_table(config.tablename) for break_window in config.break_windows: print break_window for _past_yr in config.past_years: print 'past_yr', _past_yr if config.cross_valname == 'seventy_thirty': pX_train, _Y_train, pX_valid, Y_valid, pX_test, Y_test, dic_year = \ train_valid_test_split( data, break_window, config.static_features, config.cv_cuts[ 'thirty_seventy'], config.past, config.future, past_yr=_past_yr, config=config) X_train = dumify_categorical_features(pX_train) X_valid = dumify_categorical_features(pX_valid) X_test = dumify_categorical_features(pX_test) print dic_year elif config.cross_valname == 'no_overlap': _X_train, _Y_train, X_valid, Y_valid, X_test, Y_test, dic_year =\ CV_no_overlap(break_window, _past_yr, data, config) else: raise CVerror, 'no cross validation set' if config.downsample: for ls_dwn_smple in config.rebalancing: X_train, Y_train = downsample( _X_train, _Y_train, downsample_balance=ls_dwn_smple) X_train_cols = X_train.columns.tolist() X_valid_cols = X_valid.columns.tolist() X_test_cols = X_test.columns.tolist() assert set(X_train_cols) == set(X_valid_cols) assert set(X_train_cols) == set(X_test_cols) assert set(X_test_cols) == set(X_valid_cols) run_models(config.clfs, config.visualize, break_window, X_train, Y_train, X_valid, Y_valid, config.cross_valname, results_dir=config.results_dir, dic_year=dic_year, config=config, past_year=_past_yr) else: # case of no down sampling then just rename X_train = _X_train Y_train = _Y_train run_models(config.clfs, config.visualize, break_window, X_train, Y_train, X_valid, Y_valid, config.cross_valname, results_dir=config.results_dir, dic_year=dic_year, config=config, past_year=_past_yr)