def json_flatten(f): print "Attemping to flatten all objects..." print "Grab a beer...this could take a while" print "" print "Loading json file..." try: data = json.load(f) except Exception: print "Error while loading json file... :(" exit() flattened_data = [] print "json file loaded successfully!" counter = 0 for projects in data: for project in projects["projects"]: flattened_data.append(flatten_fields(project)) counter += 1 if counter >= 100 and counter % 100 == 0: print "Num objects parsed: " + str(counter) print "Final object count: " + str(counter) json2csv(flattened_data)
def get_entity(path): print "retrieving", path, auth # get entity pool_path = '../data/' # TODO set uuid = md5(file) from hashlib import md5 import os.path s = md5(path).digest().encode('hex_codec') fname = os.path.join(pool_path, s + '.json') # pull all pages obj = [] with open(fname, 'w+') as f: while (True): r = get_resource(path) if r is None: break data = r['d'] size = len(data['results']) items = data['results'] obj.extend(items) if '__next' in data.keys(): path = data['__next'] else: break f.write(json.dumps(obj, indent=0)) # to csv return json2csv.json2csv(fname)
def get_entity(path): print "retrieving",path,auth # get entity pool_path='../data/' # TODO set uuid = md5(file) from hashlib import md5 import os.path s=md5(path).digest().encode('hex_codec') fname = os.path.join(pool_path, s+'.json') # pull all pages obj=[] with open(fname,'w+') as f: while(True): r=get_resource(path) if r is None: break data=r['d'] size = len(data['results']) items = data['results'] obj.extend(items) if '__next' in data.keys(): path=data['__next'] else: break f.write(json.dumps(obj,indent=0)) # to csv return json2csv.json2csv(fname)
'--no_instance', '--label_nc', '0', '--results_dir', outlines_folder, '--mode', opt.mode, '--which_epoch', opt.epoch ]) elif opt.model == 'encoder-decoder-skip': subprocess.call([ 'python', './segmentation/bulk_predict.py', '--input_folder', png_folder, '--output_folder', outlines_folder, '--checkpoint_path', opt.checkpoint_dir, '--crop_height', '1024', '--crop_width', '1024', '--model', 'Encoder-Decoder-Skip' ]) else: print("Unknown model") exit(-1) # Interpret ruling lines and write individual cells to json file if not opt.skip_find_cells: print("Finding cells") rulers.rule_pdffigures(json_folder, outlines_folder) # Extract the text, using the bounding boxes, from the original PDF if not opt.skip_extract_text: print("Extracting text") textboxtract.extract_pdffigures(json_folder, pdf_folder) # Create CSV files from the extracted text and locations of said text if not opt.skip_create_csv: print("Creating csv") json2csv.json2csv(json_folder, results_folder) print("Finished")
type=str, default="request.graphql", help="File containing graphql request body") parser.add_argument( '-j', '--jsonpath', type=str, default="data.deals", help="defines json path to the list to parse. Example: data.deals") parser.add_argument('url', help="URL of the graphql api to request data from") args = parser.parse_args() if not os.path.isfile(args.graphql): print( "Needs a valid request definition file. Default name: request.graphql" ) parser.print_help() sys.exit(1) with open(args.graphql, 'r') as g_file: g_req = g_file.read() if not args.url: print("Need a valid URL to start a request. None given.") parser.print_help() sys.exit(1) json2csv(do_request(args.url, g_req), args.jsonpath, "output.csv")
os.remove(os.path.join(png_folder, image)) continue img = combine.pad(img, (1024, 1024, 3)) scipy.misc.imsave(os.path.join(png_folder, image), img) # Process the tables, add outline URL to respective JSON file if not opt.skip_predict: print("Predicting outlines") if opt.model == 'pix2pix': subprocess.call('sh ./pixpred.sh %s %s %s %s' % ('gen-tables-hd', opt.checkpoint_dir, opt.dataroot, outlines_folder)) else: predict(opt.checkpoint_dir, png_folder, outlines_folder) add_outline_url(json_folder, outlines_folder) # Interpret ruling lines and write individual cells to json file if not opt.skip_find_cells: print("Finding cells") rulers.rule(json_folder) # Extract the text, using the bounding boxes, from the original PDF if not opt.skip_extract_text: print("Extracting text") textboxtract.extract(json_folder, pdf_folder) # Create CSV files from the extracted text and locations of said text if not opt.skip_create_csv: print("Creating csv") json2csv.json2csv(json_folder, csv_folder)
date_type: { "from": start_date, "to": end_date }, #"status": "completed", "nps_grade_value": { "min": None, "max": None } }, "sort": "" }) raw_json_data = json.loads(json.dumps(applications))['data'] #print(raw_json_data) json2csv(raw_json_data, csv_file_path) # OGX Data elif type_req == "OGX": if prog == 'GV': print("Getting OGV Applications...") elif prog == 'GT': print("Getting OGT Applications...") elif prog == 'GE': print("Getting OGE Applications...") elif prog == 'ALL': print("Getting OGX Applications...") people = gis_get_gql_paginated(get_data("gql", get_people).decode("utf-8"), silent=False,