def create_hdf5_container(path_i1, path_o, lf_name): px = 48 py = 48 nviews = 9 sx = 16 sy = 16 file = h5py.File(path_o + '/' + lf_name + '.hdf5', 'w') # read diffuse color LF = file_io.read_lightfield(path_i1) LF = LF.astype(np.float32) # / 255.0 cv_gt = lf_tools.cv(LF) lf_tools.save_image(path_o + '/' + lf_name, cv_gt) # maybe we need those, probably not. param_dict = file_io.read_parameters(path_i1) dset_blocks = [] # block count: write out one individual light field cx = np.int32((LF.shape[3] - px) / sx) + 1 cy = np.int32((LF.shape[2] - py) / sy) + 1 for i, j in itertools.product(np.arange(0, nviews), np.arange(0, nviews)): dset_blocks.append( file.create_dataset('views%d%d' % (i, j), (cy, cx, 3, px, py), chunks=(1, 1, 3, px, py), maxshape=(None, None, 3, px, py))) # lists indexed in 2D dset_blocks = [ dset_blocks[x:x + nviews] for x in range(0, len(dset_blocks), nviews) ] sys.stdout.write(lf_name + ': ') for bx in np.arange(0, cx): sys.stdout.write('.') sys.stdout.flush() for by in np.arange(0, cy): x = bx * sx y = by * sx # extract data for i, j in itertools.product(np.arange(0, nviews), np.arange(0, nviews)): dset_blocks[i][j][bx, by, :, :, :] = np.transpose( np.array(LF[i, j, x:x + px, y:y + py, :]), (-1, 0, 1)).reshape(3, px, py) sys.stdout.write(' Done.\n')
def convert_to_hdf5(data_folder, tgt=None): if tgt is None: tgt = os.path.join(data_folder, "scene.h5") scene = dict() scene["LF"] = file_io.read_lightfield(data_folder) params = file_io.read_parameters(data_folder) if params["category"] != "test": scene["disp_highres"] = file_io.read_disparity(data_folder, highres=True) scene["disp_lowres"] = file_io.read_disparity(data_folder, highres=False) scene["depth_highres"] = file_io.read_depth(data_folder, highres=True) scene["depth_lowres"] = file_io.read_depth(data_folder, highres=False) file_io.write_hdf5(scene, tgt)
def visualize_scene(data_folder): params = file_io.read_parameters(data_folder) if params["category"] == "test": print("Test scenes with hidden ground truth are not visualized.") return light_field = file_io.read_lightfield(data_folder) disp_map_highres = file_io.read_disparity(data_folder, highres=True) disp_map_lowres = file_io.read_disparity(data_folder, highres=False) depth_map_lowres = file_io.read_depth(data_folder, highres=False) rows, cols = 1, 4 cb_shrink = 0.7 fig = plt.figure(figsize=(20, 4)) plt.suptitle("%s:%s (%s)" % (params["category"].title(), params["scene"].title(), "x".join(str(i) for i in list(np.shape(light_field))))) plt.subplot(rows, cols, 1) plt.title("Center View") plt.imshow(light_field[4, 4, :, :, :]) plt.subplot(rows, cols, 2) plt.title("Depth Map (%dx%d)" % np.shape(depth_map_lowres)) cc = plt.imshow(depth_map_lowres, cmap=cm.viridis, interpolation="none") plt.colorbar(cc, shrink=cb_shrink) plt.subplot(rows, cols, 3) plt.title("Disparity Map (%dx%d)" % np.shape(disp_map_lowres)) cc = plt.imshow(disp_map_lowres, cmap=cm.viridis, interpolation="none") plt.colorbar(cc, shrink=cb_shrink) plt.subplot(rows, cols, 4) plt.title("Disparity Map (%dx%d)" % np.shape(disp_map_highres)) cc = plt.imshow(disp_map_highres, cmap=cm.viridis, interpolation="none") plt.colorbar(cc, shrink=cb_shrink) fig_name = os.path.join(data_folder, "scene.png") plt.savefig(fig_name, dpi=200, bbox_inches='tight') fig.clf() plt.close(fig)
import file_io import sys import os if len(sys.argv) < 2: print("Please include parameter file") param_file = sys.argv[1] subreddits = file_io.read_parameters(param_file)[1] src = file_io.read_parameters(param_file)[6] count = [] subreddits = [x.lower() for x in subreddits] subreddits.sort() for sub in subreddits: del_count = 0 input_file = src + '/' + sub.lower() + '_' + src + '.json' data = file_io.read(input_file)['data'] for datum in data: if datum['body'] == '[removed]' in datum['body']: del_count += 1 count.append("{:15s}{:10,d}\n".format(sub,del_count)) with open(src + '/' + src + '_rem_count.txt', 'w', encoding="utf8") as f: f.writelines(count)
# imean = 0.3 # factor = imean / np.mean(cv_gt) # LF_diffuse = LF_diffuse*factor # LF_specular = LF_specular*factor # LF = np.add(LF_diffuse, LF_specular) # cv_gt = lf_tools.cv(LF) disp = file_io.read_disparity(data_folder) disp_gt = np.array(disp[0]) disp_gt = np.flip(disp_gt, 0) lf_tools.save_image(training_data_dir + 'input' + lf_name, cv_gt) # lf_tools.save_image(training_data_dir + 'input' + lf_name, cv_gt) # maybe we need those, probably not. param_dict = file_io.read_parameters(data_folder) # write out one individual light field # block count cx = np.int32((LF.shape[3] - px) / sx) + 1 cy = np.int32((LF.shape[2] - py) / sy) + 1 for by in np.arange(0, cy): sys.stdout.write('.') sys.stdout.flush() for bx in np.arange(0, cx): x = bx * sx y = by * sx
import nltk from nltk.corpus import wordnet import csv import string import file_io import pandas as pd from sklearn.feature_extraction.text import TfidfVectorizer from nltk.stem import WordNetLemmatizer from nltk.tokenize import word_tokenize from collections import Counter if len(sys.argv) < 2: print("Please include parameter file and API file") param_file = sys.argv[1] subreddits = file_io.read_parameters(param_file)[1] src,dst = file_io.read_parameters(param_file)[6:8] #gather data on stopwords and symbols to be ignored from nltk.corpus import stopwords stopwords.words('english') stop_words = set(stopwords.words('english')) punctuation = list(string.punctuation) lemmatizer = WordNetLemmatizer() syn = file_io.load_dict('syn.txt') slurs = file_io.load_slurs('slurs') for sub in subreddits: sub_data = [] word_count = {}
import Reddit import file_io import time import sys if len(sys.argv) < 2: print("Please include parameter file") param_file = sys.argv[1] method, subreddits, start, end, fields, k, src, dst, extend = file_io.read_parameters(param_file) program_start = int(time.time()) for sub in subreddits: sub_count = 0 epoch = start output_file = dst + '/' + sub.lower() + '_' + dst + '.json' file_io.set_output_file(output_file) if extend: epoch, sub_count = file_io.extract_data(sub,src) while True: if (method=="count" and sub_count >= k*1000) or (method=="time" and epoch <= end): break epoch, count_curr = Reddit.request_data(sub,500,fields,epoch,sub_count==0) if count_curr == -1: break else:
import language import file_io import time import json import sys if len(sys.argv) < 3: print("Please include parameter file and API file") param_file = sys.argv[1] key_file = sys.argv[2] program_start = int(time.time()) subreddits = file_io.read_parameters(param_file)[1] k, src, dst, ext = file_io.read_parameters(param_file)[5:] k = k * 1000 language.set_API_key(key_file) for sub in subreddits: sub_data = [] input_file = src + '/' + sub.lower() + '_' + src + '.json' comment_data = file_io.read(input_file)["data"] output_file = dst + '/' + sub.lower() + '_' + dst + '.json' file_io.set_output_file(output_file) c = 0 #number of api calls d = 0 #number of succesful api calls st = 0 end = len(comment_data) if ext: