def _generator4(path): """ Args: path: path of the dataframe Returns: yield outputs of X and Y pairs """ args = init_args() catalog = load_catalog(path) def preprocess(x, y=None): zero = False if not np.any(x): zero = True img = (x - avg_x) / std_x return img, y, zero unique_paths = pd.unique(catalog['hdf5_8bit_path'].values.ravel()) X = [] Y = [] for path in tqdm(unique_paths): grouped = catalog[path == catalog.hdf5_8bit_path] if path == "nan": continue try: samples = load_numpy(path) except Exception as e: continue stats = ["BND", "TBL", "DRA", "FPK", "GWN", "PSU", "SXF"] # stat_array = [samples[i] for i in stats] for i, station_i in enumerate(args.station_data.keys()): # sample = samples[i] sample = samples[station_i] # try: # rows = grouped[grouped.station == station_i] # except Exception as e: # print(grouped.columns.values) for idx, row in grouped.iterrows(): if row[[station_i + "_GHI"]].isnull()[0]: continue elif not row[[station_i + "_DAYTIME"]][0]: continue else: offset_idx = row['hdf5_8bit_offset'] timedelta_rows = [ grouped[grouped.index == (idx + datetime.timedelta(hours=i))] for i in [0, 1, 3, 6] ] if timedelta_rows[-1].empty: continue # GHI_0 = row[station_i + "_GHI"] # train_df[train_df.index == train_df.index[0]+datetime.timedelta(hours=1)] # pdb.set_trace() try: GHIs = [ i[station_i + "_GHI"].values[0] for i in timedelta_rows ] except Exception as e: print(timedelta_rows) CS_GHIs = [ i[station_i + "_CLEARSKY_GHI"].values[0] for i in timedelta_rows ] y = np.array(CS_GHIs) - np.array(GHIs) if np.isnan(np.sum(y)): continue # ini = time.time() # print(station_coords) imgs = [] x = sample[offset_idx].swapaxes(0, 1).swapaxes(1, 2) # print(y) x = preprocess(x)[0] # print("adding") continue yield x, y
def _generator(path): """ Args: path: path of the dataframe Returns: yield outputs of X and Y pairs """ args = init_args() catalog = load_catalog(path) def preprocess(x, y): zero = False if not np.any(x): zero = True img = (x - avg_x) / std_x return img, y, zero unique_paths = pd.unique(catalog['hdf5_8bit_path'].values.ravel()) # print(unique_paths,type(unique_paths)) epochs = args.epochs zero_img_count = 0 for i in range(1): np.random.shuffle(unique_paths) # print(shuffled) for path in unique_paths: # samples = fetch_all_samples_hdf5(args,path) try: samples = load_numpy(path) except Exception as e: continue grouped = catalog[path == catalog.hdf5_8bit_path] for station in args.station_data.keys(): df = grouped[grouped.station == station] argsort = np.argsort(df['hdf5_8bit_offset'].values) offsets_0 = df['hdf5_8bit_offset'].values[argsort] GHIs_0 = df[df.hdf5_8bit_offset.isin(offsets_0)].GHI.values CS_GHI_0 = df[df.hdf5_8bit_offset.isin( offsets_0)].CLEARSKY_GHI.values y_0 = CS_GHI_0 - GHIs_0 sample = samples[station] for i in range(offsets_0.shape[0]): x = sample[i].swapaxes(0, 1).swapaxes(1, 2) y = y_0[i:i + 1] # print(type(x),type(y)) x, y, z = preprocess(x, y) # pdb.set_trace() if z: zero_img_count += 1 if z % 10000 == 0: print("Zero img count:", zero_img_count) continue else: yield x, y # yield { # # 'station_name': 'tf.string', # 'images': x, # # 'csky_ghi': 0, # # 'ghi': 0, # 'y': y} print("Zero img count:", zero_img_count)
def _generator3(path): """ Args: path: path of the dataframe Returns: yield outputs of X and Y pairs """ args = init_args() catalog = load_catalog(path) def preprocess(x, y=None): zero = False if not np.any(x): zero = True img = (x - avg_x) / std_x return img, y, zero for index in tqdm(range(0, len(catalog), 200)): rows = catalog[index:index + 200] for idx, row in rows.iterrows(): # print(row) # pdb.set_trace() if row.ncdf_path == "nan": continue samples = load_numpy(row['hdf5_8bit_path']) offset_idx = row['hdf5_8bit_offset'] # continue timedelta_rows = [ catalog[catalog.index == (idx + datetime.timedelta(hours=i))] for i in [0, 1, 3, 6] ] # CS_GHIs = [catalog[catalog.index==(idx+datetime.timedelta(hours=i))][station_i + "_CLEARSKY_GHI"].values[0] for i in [0,1,3,6]] for station_i in args.station_data.keys(): sample = samples[station_i] if row[[station_i + "_GHI"]].isnull()[0]: continue elif row[[station_i + "_DAYTIME"]][0] == 0: continue else: GHI_0 = row[station_i + "_GHI"] # train_df[train_df.index == train_df.index[0]+datetime.timedelta(hours=1)] # pdb.set_trace() GHIs = [ i[station_i + "_GHI"].values[0] for i in timedelta_rows ] CS_GHIs = [ i[station_i + "_CLEARSKY_GHI"].values[0] for i in timedelta_rows ] y = np.array(CS_GHIs) - np.array(GHIs) if np.isnan(np.sum(y)): continue # ini = time.time() # print(station_coords) imgs = [] x = sample[offset_idx].swapaxes(0, 1).swapaxes(1, 2) # print(y) x = preprocess(x)[0] continue yield x, y
def _generator2(path): """ Args: path: path of the dataframe Returns: yield outputs of X and Y pairs """ args = init_args() catalog = load_catalog(path) def preprocess(x): zero = False if not np.any(x): zero = True img = (x - avg_x) / std_x return img, zero print("starting generator again...") unique_paths = pd.unique(catalog['hdf5_8bit_path'].values.ravel()) # print(unique_paths,type(unique_paths)) epochs = args.epochs zero_img_count = 0 k_sequences = 0 GHI_sequence_steps = [4, 8, 12] # in the future, in addition to T0 GHI_sequence_steps_reverse = [24, 20, 12, 0] img_sequence_step = 2 for i in range(1): np.random.shuffle(unique_paths) # print(shuffled) for path in unique_paths: # samples = fetch_all_samples_hdf5(args,path) try: samples = load_numpy(path) except Exception as e: continue X = [] Y = [] grouped = catalog[path == catalog.hdf5_8bit_path] for station in args.station_data.keys(): # print("I am here") df = grouped[grouped.station == station] argsort = np.argsort(df['hdf5_8bit_offset'].values) offsets_0 = df['hdf5_8bit_offset'].values[argsort] matching_offsets_imgs = offsets_0 for i in range(k_sequences): matching_offsets_imgs = np.intersect1d( matching_offsets_imgs, matching_offsets_imgs + img_sequence_step) # print("matching offsets",matching_offsets_imgs) # For GHIs matching_offsets_GHIs = matching_offsets_imgs for GHI_sequence_step in GHI_sequence_steps: matching_offsets_GHIs = np.intersect1d( matching_offsets_GHIs, matching_offsets_GHIs + GHI_sequence_step) # print("matching offsets_GHIS",matching_offsets_GHIs) GHI_pairs_list = [] CS_GHI_pairs_list = [] y_pairs_list = [] for i, GHI_sequence_step in enumerate( GHI_sequence_steps_reverse): GHI_vals = df[df.hdf5_8bit_offset.isin( matching_offsets_GHIs - GHI_sequence_step)].GHI.values CS_GHI_vals = df[df.hdf5_8bit_offset.isin( matching_offsets_GHIs - GHI_sequence_step)].CLEARSKY_GHI.values GHI_pairs_list.append(GHI_vals) CS_GHI_pairs_list.append(CS_GHI_vals) y = CS_GHI_vals - GHI_vals y_pairs_list.append(y) GHI_pairs = zip(*GHI_pairs_list) CS_GHI_pairs = zip(*CS_GHI_pairs_list) y_pairs = zip(*y_pairs_list) # iso_dt = df[df.hdf5_8bit_offset.isin(matching_offsets_imgs)]['iso-datetime'].tolist() # date_time_attrs = [get_datetime_attrs(dt) for dt in iso_dt] offsets_pairs_list = [] for i in range(k_sequences + 1): offsets_pairs_list.append(matching_offsets_imgs - (k_sequences + i) * img_sequence_step) offsets_pairs_list.append(matching_offsets_imgs) offset_pairs = zip(*offsets_pairs_list) GHIs_0 = df[df.hdf5_8bit_offset.isin(offsets_0)].GHI.values CS_GHI_0 = df[df.hdf5_8bit_offset.isin( offsets_0)].CLEARSKY_GHI.values y_0 = CS_GHI_0 - GHIs_0 # example_pair = zip(offset_pairs, date_time_attrs, CS_GHI_pairs, GHI_pairs) # print(list(offset_pairs), list(CS_GHI_pairs), list(GHI_pairs), list(y_pairs)) example_pair = zip(offset_pairs, CS_GHI_pairs, GHI_pairs, y_pairs) # if not (len(offset_pairs) == len(CS_GHI_pairs) == len(GHI_pairs) == len(y_pairs)): # print("golmaal hai bhai sab golmaal hai") # print(list(example_pair)) sample = samples[station] for offsets, CS_GHIs, GHIs, ys in example_pair: imgs = [] for offset in offsets: img = sample[offset].swapaxes(0, 1).swapaxes(1, 2) img, status = preprocess(img) imgs.append(img) # img_1 = sample[offset_1].swapaxes(0,1).swapaxes(1,2) # img_0 = sample[offset_0].swapaxes(0,1).swapaxes(1,2) if k_sequences == 0: imgs = imgs[0] if False: a = (imgs, date_time_pair, CS_GHIs) yield (imgs, date_time_pair, CS_GHIs), (GHIs) else: # print("yielding") X.append(imgs) Y.append(ys) # yield imgs, ys # np.random.shuffle(X) # np.random.shuffle(Y) X, Y = shuffle(X, Y, random_state=0) for i, j in zip(X, Y): yield i, j print("Zero img count:", zero_img_count)
_generator2, args=([path]), output_types=( tf.float32, tf.float32, ), output_shapes=( # tf.TensorShape([1, 70, 70, 5]), tf.TensorShape([70, 70, 5]), tf.TensorShape([4]), )).batch(args.batch_size).prefetch(tf.data.experimental.AUTOTUNE) if __name__ == "__main__": args = init_args() for epoch in range(args.epochs): print("EPOCH ", epoch) # self.train_loss.reset_states() # self.valid_loss.reset_states() sdl_train = SimpleDataLoader(args, args.data_catalog_path) tm = tqdm(total=1000) # R! from data loader's tqdm # ini = time.time() counter = 0 for batch in sdl_train: tm.update(1) counter += 1 if counter > 1000:
def test_step(images, labels): predictions = model(images, training=False) v_loss = loss_object(labels, predictions) valid_loss(loss_object(labels, predictions)) print("\n ** Valid Loss: ", v_loss.numpy()) for i in range(10): print(predictions[i].numpy(), labels[i].numpy()) if __name__ == "__main__": wandb.init(project="project1-meta") args = config.init_args() print(tf.__version__) # Create an instance of the model model = Resnet_Meta(args) loss_object = tf.keras.losses.MeanSquaredError() optimizer = tf.keras.optimizers.Adam(learning_rate=0.01) train_loss = tf.keras.metrics.Mean(name='train_loss') valid_loss = tf.keras.metrics.Mean(name='valid_loss') wandb.config.learning_rate = optimizer.get_config()['learning_rate'] catalog_train = load_catalog(args.data_catalog_path)
def data_loader_main(): args = init_args() load_dataset_seq(args)