Ejemplo n.º 1
0
def json_flatten(f):
    print "Attemping to flatten all objects..."
    print "Grab a beer...this could take a while"
    print ""
    print "Loading json file..."

    try:
        data = json.load(f)

    except Exception:
        print "Error while loading json file... :("
        exit()

    flattened_data = []
    print "json file loaded successfully!"

    counter = 0
    for projects in data:
        for project in projects["projects"]:
            flattened_data.append(flatten_fields(project))

            counter += 1
            if counter >= 100 and counter % 100 == 0:
                print "Num objects parsed: " + str(counter)

    print "Final object count: " + str(counter)
    json2csv(flattened_data)
Ejemplo n.º 2
0
def get_entity(path):
    print "retrieving", path, auth

    # get entity

    pool_path = '../data/'

    # TODO set uuid = md5(file)
    from hashlib import md5
    import os.path
    s = md5(path).digest().encode('hex_codec')
    fname = os.path.join(pool_path, s + '.json')

    # pull all pages
    obj = []
    with open(fname, 'w+') as f:
        while (True):
            r = get_resource(path)
            if r is None: break
            data = r['d']
            size = len(data['results'])
            items = data['results']
            obj.extend(items)
            if '__next' in data.keys():
                path = data['__next']
            else:
                break

        f.write(json.dumps(obj, indent=0))

    # to csv
    return json2csv.json2csv(fname)
Ejemplo n.º 3
0
def get_entity(path):
    print "retrieving",path,auth

    # get entity
    
    pool_path='../data/'
    

    # TODO set uuid = md5(file)
    from hashlib import md5
    import os.path
    s=md5(path).digest().encode('hex_codec')
    fname = os.path.join(pool_path, s+'.json')

# pull all pages
    obj=[]
    with open(fname,'w+') as f:
        while(True):
            r=get_resource(path)
            if r is None: break
            data=r['d']
            size = len(data['results'])
            items = data['results']
            obj.extend(items)
            if '__next' in data.keys():
                path=data['__next']
            else:
                break

        f.write(json.dumps(obj,indent=0))
        
    
    # to csv
    return json2csv.json2csv(fname)
Ejemplo n.º 4
0
                '--no_instance', '--label_nc', '0', '--results_dir',
                outlines_folder, '--mode', opt.mode, '--which_epoch', opt.epoch
            ])
        elif opt.model == 'encoder-decoder-skip':
            subprocess.call([
                'python', './segmentation/bulk_predict.py', '--input_folder',
                png_folder, '--output_folder', outlines_folder,
                '--checkpoint_path', opt.checkpoint_dir, '--crop_height',
                '1024', '--crop_width', '1024', '--model',
                'Encoder-Decoder-Skip'
            ])
        else:
            print("Unknown model")
            exit(-1)

    # Interpret ruling lines and write individual cells to json file
    if not opt.skip_find_cells:
        print("Finding cells")
        rulers.rule_pdffigures(json_folder, outlines_folder)

    # Extract the text, using the bounding boxes, from the original PDF
    if not opt.skip_extract_text:
        print("Extracting text")
        textboxtract.extract_pdffigures(json_folder, pdf_folder)

    # Create CSV files from the extracted text and locations of said text
    if not opt.skip_create_csv:
        print("Creating csv")
        json2csv.json2csv(json_folder, results_folder)

    print("Finished")
Ejemplo n.º 5
0
                        type=str,
                        default="request.graphql",
                        help="File containing graphql request body")
    parser.add_argument(
        '-j',
        '--jsonpath',
        type=str,
        default="data.deals",
        help="defines json path to the list to parse. Example: data.deals")
    parser.add_argument('url',
                        help="URL of the graphql api to request data from")

    args = parser.parse_args()

    if not os.path.isfile(args.graphql):
        print(
            "Needs a valid request definition file. Default name: request.graphql"
        )
        parser.print_help()
        sys.exit(1)

    with open(args.graphql, 'r') as g_file:
        g_req = g_file.read()

    if not args.url:
        print("Need a valid URL to start a request. None given.")
        parser.print_help()
        sys.exit(1)

    json2csv(do_request(args.url, g_req), args.jsonpath, "output.csv")
                os.remove(os.path.join(png_folder, image))
                continue
            img = combine.pad(img, (1024, 1024, 3))
            scipy.misc.imsave(os.path.join(png_folder, image), img)

    # Process the tables, add outline URL to respective JSON file
    if not opt.skip_predict:
        print("Predicting outlines")
        if opt.model == 'pix2pix':
            subprocess.call('sh ./pixpred.sh %s %s %s %s' %
                            ('gen-tables-hd', opt.checkpoint_dir, opt.dataroot,
                             outlines_folder))
        else:
            predict(opt.checkpoint_dir, png_folder, outlines_folder)
    add_outline_url(json_folder, outlines_folder)

    # Interpret ruling lines and write individual cells to json file
    if not opt.skip_find_cells:
        print("Finding cells")
        rulers.rule(json_folder)

    # Extract the text, using the bounding boxes, from the original PDF
    if not opt.skip_extract_text:
        print("Extracting text")
        textboxtract.extract(json_folder, pdf_folder)

    # Create CSV files from the extracted text and locations of said text
    if not opt.skip_create_csv:
        print("Creating csv")
        json2csv.json2csv(json_folder, csv_folder)
Ejemplo n.º 7
0
                    date_type: {
                        "from": start_date,
                        "to": end_date
                    },
                    #"status": "completed",
                    "nps_grade_value": {
                        "min": None,
                        "max": None
                    }
                },
                "sort": ""
            })

        raw_json_data = json.loads(json.dumps(applications))['data']
        #print(raw_json_data)
        json2csv(raw_json_data, csv_file_path)

    # OGX Data
    elif type_req == "OGX":
        if prog == 'GV':
            print("Getting OGV Applications...")
        elif prog == 'GT':
            print("Getting OGT Applications...")
        elif prog == 'GE':
            print("Getting OGE Applications...")
        elif prog == 'ALL':
            print("Getting OGX Applications...")

        people = gis_get_gql_paginated(get_data("gql",
                                                get_people).decode("utf-8"),
                                       silent=False,