예제 #1
0
파일: test_pyjq.py 프로젝트: yoav-orca/pyjq
def test_one():
    assert pyjq.one(". + $foo", "val", vars=dict(foo="bar")) == "valbar"

    # if got multiple elements
    with pytest.raises(IndexError):
        pyjq.one(".[]", [1, 2])

    # if got no elements
    with pytest.raises(IndexError):
        pyjq.one(".[]", [])
def get_dataframe_zoe(url):
    global df_links

    df = pd.DataFrame({
        'Region': [],
        'Version': [],
        'Year': [],
        'Mileage': [],
        'Owner\'s phone': [],
        'Price': [],
        'Argus rating': [],
        'Benefit Buyer': []
    })

    try:
        page = requests.get(url, headers=request_headers)
        soup = _handle_request_result_and_build_soup(page)
        reg_json = re.compile(r"xtMultC:(.*),")
        reg_num = re.compile(r"(\d+)")
        reg_phone = re.compile(r"[0-9 ]{2,}")

        json_tab = json.loads(str(reg_json.findall(page.text))[3:-2])

        version = str(pyjq.one(".\"16\"", json_tab))[1:-1]
        mileage = str(pyjq.one(".\"33\"", json_tab))[1:-1]
        mileage = str(reg_num.findall(mileage))[2:-2]
        year = str(pyjq.one(".\"36\"", json_tab))[1:-1]
        year = str(reg_num.findall(year))[2:-2]
        price = str(pyjq.one(".\"32\"", json_tab))[1:-1]
        price = int(str(reg_num.findall(price))[2:-2])
        phone = soup.find("div", class_="phoneNumber1").text
        phone = ''.join(reg_phone.findall(phone)).strip('  ')

        rating_argus = int(get_rating_argus(url, reg_num))

        if rating_argus > 0:
            diff_price = rating_argus - price
        else:
            diff_price = 0

        df = df.append(
            {
                'Region': df_links.set_index("Link")['region'].loc[(url)],
                'Version': version,
                'Year': year,
                'Mileage': mileage,
                'Owner\'s phone': phone,
                'Price': price,
                'Argus rating': rating_argus,
                'Benefit Buyer': diff_price
            },
            ignore_index=True)
    except UnboundLocalError:
        pass
    return df
예제 #3
0
    def test_one(self):
        self.assertEqual(pyjq.one('. + $foo', 'val', vars=dict(foo='bar')),
                         'valbar')

        # raise IndexError if got multiple elements
        with self.assertRaises(IndexError):
            pyjq.one('.[]', [1, 2])

        # raise IndexError if got no elements
        with self.assertRaises(IndexError):
            pyjq.one('.[]', [])
예제 #4
0
def on_message(client, userdata, msg):
  received_json = json.loads(msg.payload.decode('utf-8'))
  published_json = pyjq.one(LIGHT_JQ_FILTER, received_json)
  client.publish(PUB_TOPIC, json.dumps(published_json))
  #print(json.dumps(published_json))
  client.publish(PUB_TOPIC, windDomoticz(received_json))
  #print(windDomoticz(received_json))
  published_json = pyjq.one(UV_JQ_FILTER, received_json)
  client.publish(PUB_TOPIC, json.dumps(published_json))
  #print(json.dumps(published_json))
  published_json = pyjq.one(UV_test_JQ_FILTER, received_json)
  client.publish(PUB_TOPIC, json.dumps(published_json))
예제 #5
0
파일: test_pyjq.py 프로젝트: ltd/pyjq
    def test_one(self):
        self.assertEqual(
            pyjq.one('. + $foo', 'val', vars=dict(foo='bar')),
            'valbar'
        )

        # raise IndexError if got multiple elements
        with self.assertRaises(IndexError):
            pyjq.one('.[]', [1, 2])

        # raise IndexError if got no elements
        with self.assertRaises(IndexError):
            pyjq.one('.[]', [])
예제 #6
0
def get_role_iam(rolename, account_iam):
    """Given the IAM of an account, and a role name, return the IAM data for the role"""
    try:
        role_iam = pyjq.one('.RoleDetailList[] | select(.RoleName == "{}")'.format(rolename), account_iam)
    except IndexError:
        raise Exception("Unknown role named {}".format(rolename))
    return role_iam
예제 #7
0
def get_user_iam(username, account_iam):
    """Given the IAM of an account, and a username, return the IAM data for the user"""
    try:
        user_iam = pyjq.one('.UserDetailList[] | select(.UserName == "{}")'.format(username), account_iam)
    except IndexError:
        exit("ERROR: Unknown user named {}".format(username))
    return user_iam
예제 #8
0
def drain(config, file_name, keep=False):
    """
    Receive messages from the configured queue and write them to a file, pretty print them to stdout and then
     delete them from the queue
    """
    queue_url = config["queue_url"]
    printer = config["printer"] if config.get("printer", None) else None

    count = 0

    if os.path.isfile(file_name) and os.path.exists(file_name):
        print(f"{file_name} already exists", file=sys.stderr)
        exit(1)

    sqs_client = boto3.client("sqs", region_name=config["region"])

    with open(file_name, "wb", buffering=0) as o:
        for message in receive_and_delete_messages(sqs_client, queue_url, keep):
            formatted_message = json.dumps(message) + "\n"
            o.write(formatted_message.encode("utf-8", "ignore"))
            if printer:
                print(pyjq.one(printer, message))
            count += 1

    print("Drained " + str(count) + " messages.")
예제 #9
0
 def create_node_extra(issue_key, fields):
     import pyjq
     try:
         return pyjq.one(extra_jq, fields, vars=dict(issue_key=issue_key))
     except Exception:
         log('Problem with extra for issue %s' % issue_key)
         print(fields)
         raise
예제 #10
0
def get_user_allowed_actions(aws_api_list, user_iam, account_iam):
    """Return the privileges granted to a user by IAM"""
    groups = user_iam['GroupList']
    managed_policies = user_iam['AttachedManagedPolicies']

    privileges = Privileges(aws_api_list)

    # Get permissions from groups
    for group in groups:
        group_iam = pyjq.one(
            '.GroupDetailList[] | select(.GroupName == "{}")'.format(group),
            account_iam)
        # Get privileges from managed policies attached to the group
        for managed_policy in group_iam['AttachedManagedPolicies']:
            policy_filter = '.Policies[] | select(.Arn == "{}") | ' \
                            '.PolicyVersionList[] | select(.IsDefaultVersion == true) | .Document'
            policy = pyjq.one(
                policy_filter.format(managed_policy['PolicyArn']), account_iam)
            for stmt in make_list(policy['Statement']):
                privileges.add_stmt(stmt)

        # Get privileges from in-line policies attached to the group
        for inline_policy in group_iam['GroupPolicyList']:
            policy = inline_policy['PolicyDocument']
            for stmt in make_list(policy['Statement']):
                privileges.add_stmt(stmt)

    # Get privileges from managed policies attached to the user
    for managed_policy in managed_policies:
        policy_filter = '.Policies[] | select(.Arn == "{}") | ' \
                        '.PolicyVersionList[] | select(.IsDefaultVersion == true) | .Document'
        policy = pyjq.one(policy_filter.format(managed_policy['PolicyArn']),
                          account_iam)
        for stmt in make_list(policy['Statement']):
            privileges.add_stmt(stmt)

    # Get privileges from inline policies attached to the user
    for stmt in pyjq.all('.UserPolicyList[].PolicyDocument.Statement[]',
                         user_iam):
        privileges.add_stmt(stmt)

    return privileges.determine_allowed()
예제 #11
0
 def load(self, json_file):
     data = json.load(json_file)
     
     ## If we wanted to allow the user to use JQ to select the keys to use
     ## we would change the order of both these lines
     ## (... self._target_data(...) and data = jqp.one(...) ...)
     ## 
     ## Or another behaviour you may want to allow by swapping their order
     ## is allowing the user to use keys and data outside the self.collection
     ## attribute as part of the preprocessing. It offers more possibilities
     data = self._target_data(data)
     
     if not self.context_constants:
         self.context_constants = {"aux":{"_file_": json_file.name}}
     else:
         assert "aux" in self.context_constants, "Missing the root key 'aux' in 'context-constants' of the outline file"
         self.context_constants["aux"]["_file_"] = json_file.name    
     
     # performance: avoid calling jq if identity
     data = jqp.one(self.preprocessing, data, vars=self.context_constants) if jqp and self.preprocessing else data
     
     ## Mapping and processing
     self.process_each(data)
     
     # performance: avoid calling jq if identity
     if jqp and self.postprocessing:
         self.rows = jqp.one(self.postprocessing, self.rows, vars=self.context_constants)
     
     self._update_header_keys(self.rows)
     # special values
     vnone = self.special_values_mapping.get("null", "")
     vempty = self.special_values_mapping.get("empty", "")
     vtrue = self.special_values_mapping.get("true", "true")
     vfalse = self.special_values_mapping.get("false", "false")
     
     # a tad faster than the 2 calls equivalent
     # however, replace it if needed for maintenance
     self.rows = self._replace_nulls(self.rows, vnone, vempty)
     # self.rows = self._replace_value(self.rows, None, vnone, by_identity=True)
     # self.rows = self._replace_value(self.rows, "", vempty, by_identity=False)
     self.rows = self._replace_value(self.rows, True, vtrue, by_identity=True)
     self.rows = self._replace_value(self.rows, False, vfalse, by_identity=True)
 def service_calendars(self):
     return [
         _ServiceCalendar(
             endpoint.description,
             [
                 datetime.strptime(date, self._DATE_FORMAT).date()
                 for date in pyjq.one('.calendar."{}"'.format(endpoint.id), self._response)
             ]
         )
         for endpoint in self._service_endpoints
     ]
예제 #13
0
def get_role_allowed_actions(aws_api_list, role_iam, account_iam):
    """Return the privileges granted to a role by IAM"""
    privileges = Privileges(aws_api_list)

    # Get privileges from managed policies
    for managed_policy in role_iam['AttachedManagedPolicies']:
        policy = pyjq.one(
            '.Policies[] | select(.Arn == "{}") | .PolicyVersionList[] | select(.IsDefaultVersion == true) | .Document'
            .format(managed_policy['PolicyArn']), account_iam)
        for stmt in make_list(policy['Statement']):
            privileges.add_stmt(stmt)

    # Get privileges from attached policies
    for policy in role_iam['RolePolicyList']:
        for stmt in make_list(policy['PolicyDocument']['Statement']):
            privileges.add_stmt(stmt)

    return privileges.determine_allowed()
예제 #14
0
    def jadual_negeri(negeri):
        fetch_state = api.get_negeri(
            args.negeri) if args.negeri else api.get_negeri()

        states = pyjq.one(".states", fetch_state)
        myzone = []

        sp.start()
        for i in range(len(states)):
            fetch_zon = api.get_negeri(str(states[i]))
            myzone.append(pyjq.all(".results[]", fetch_zon))
            sp.hide()
            sp.write(states[i] + "✅")
        sp.ok()

        zon_formatted = pyjq.all(".[][]", myzone)
        data_format = data_for_jadual(zon_formatted, fields)
        print(tabulate(data_format, fields, tablefmt="fancy_grid"))
예제 #15
0
 def create_node_label(issue_key, fields):
     # truncate long labels with "...", but only if the three dots are
     # replacing more than two characters -- otherwise the truncated
     # label would be taking more space than the original.
     summary = fields['summary']
     if len(summary) > MAX_SUMMARY_LENGTH + 2:
         summary = summary[:MAX_SUMMARY_LENGTH] + '...'
     short_summary = summary.replace('"', '\\"')
     if not jq:
         return '{} ({})'.format(issue_key, short_summary)
     else:
         import pyjq
         try:
             return str(pyjq.one(jq, fields,
                                 vars=dict(issue_key=issue_key)))
             #'"{}({})\n{}\n{}"'.format(issue_key, short_summary, fields['assignee']['displayName'], fields['labels'])
         except Exception:
             log('Error with issue %s' % issue_key)
             print(fields)
             raise
예제 #16
0
def jadual_lokasi(args):

    lok = get_zon(args.lokasi.title())
    data = pyjq.all(
        ".prayer_times[]|{tarikh:.date,subuh:.subuh,zohor:.zohor,asar:.asar,maghrib:.maghrib,isyak:.isyak}",
        api.get_week(lok)
    ) if args.minggu else pyjq.one(
        ".|[{tarikh:.prayer_times.date,subuh:.prayer_times.subuh,zohor:.prayer_times.zohor,asar:.prayer_times.asar, maghrib:.prayer_times.maghrib,isyak:.prayer_times.isyak}]",
        api.get_today(lok))

    if args.fields:
        fields = ["tarikh"] + args.fields or [
            "tarikh", "subuh", "zohor", "asar", "maghrib", "isyak"
        ]
    data_format = data_for_jadual(data, fields)
    if args.notify:
        for waktu in args.notify:
            notify(data[0][waktu])
    else:
        print(tabulate(data_format, fields, tablefmt="fancy_grid"))
def main():
    parser = ArgumentParser()
    parser.add_argument('host')
    parser.add_argument('port')
    parser.add_argument('job')
    parser.add_argument('build_number', nargs='?', default='lastSuccessfulBuild')
    parser.add_argument('-o', '--output-dir', action='store', dest='output_dir', default='artifacts')
    parser.add_argument('--console-text', '-c', action='store_true', dest='console_text')

    args = parser.parse_args()

    output_dir = Path(args.output_dir)
    create_empty_dir(output_dir)

    def api_url(url):
        return urljoin(url, 'api/json')

    build_api_url = api_url('http://{0.host}:{0.port}/job/{0.job}/{0.build_number}/'.format(args))
    for run_url in jq.all('.runs[].url', url=build_api_url):
        subjob_url = urljoin(run_url, '../')
        subjob_name = jq.one('.displayName', url=api_url(subjob_url))
        subjob_dir = output_dir / urllib.request.quote(subjob_name, '')

        if not subjob_dir.is_dir():
            subjob_dir.mkdir(parents=True)

        with (subjob_dir / 'consoleText').open('wb') as local_fp, \
             urlopen(urljoin(run_url, 'consoleText')) as http_fp:
            shutil.copyfileobj(http_fp, local_fp)

        zip_fp = io.BytesIO(urlopen(urljoin(run_url, 'artifact/*zip*/archive.zip')).read())
        with ZipFile(zip_fp) as z:
            for name in z.namelist():
                prefix = 'archive/'
                if not name.startswith(prefix): continue

                path = subjob_dir / name[len(prefix):]
                if not path.parent.is_dir():
                    path.parent.mkdir(parents=True)
                with path.open('wb') as fp:
                    fp.write(z.read(name))
예제 #18
0
def info_zon(args, fields=["zone", "negeri", "lokasi"]):
    def jadual_negeri(negeri):
        fetch_state = api.get_negeri(
            args.negeri) if args.negeri else api.get_negeri()

        states = pyjq.one(".states", fetch_state)
        myzone = []

        sp.start()
        for i in range(len(states)):
            fetch_zon = api.get_negeri(str(states[i]))
            myzone.append(pyjq.all(".results[]", fetch_zon))
            sp.hide()
            sp.write(states[i] + "✅")
        sp.ok()

        zon_formatted = pyjq.all(".[][]", myzone)
        data_format = data_for_jadual(zon_formatted, fields)
        print(tabulate(data_format, fields, tablefmt="fancy_grid"))

    if args.zonkod is None:
        jadual_negeri(args.negeri)
    else:
        sp.start()
        fetch_zon = api.get_today(args.zonkod)
        data = pyjq.one(
            """
        .|{zone,tarikh:.prayer_times.date,locations,azan:{
         subuh: .prayer_times.subuh,
         zohor: .prayer_times.zohor,
         asar:  .prayer_times.asar,
         maghrib:.prayer_times.maghrib,
         isyak: .prayer_times.isyak}}
        """, fetch_zon)
        fields = data.keys()
        vals = list(map(lambda x: format_value(data[x]), fields))
        items = list(zip(fields, vals))
        sp.ok()
        print(tabulate(items, tablefmt="fancy_grid"))
예제 #19
0
def _get_data_zoe_page_data(soup, region):
    row_dict = {}
    pattern = re.compile(
        "(?i)(\\blife\\b)|(\\bintens\\b)|(\\bedition\\b)|(\\bzen\\b)")
    tags = soup.find_all('script')
    json_data = json.loads(tags[-3].text[20:])
    row_dict.update({"Area": region})
    row_dict.update({
        "Year":
        pyjq.one(".adview.attributes[] | select(.key == \"regdate\").value",
                 json_data)
    })
    row_dict.update({
        "Mileage":
        pyjq.one(".adview.attributes[] | select(.key == \"mileage\").value",
                 json_data)
    })
    row_dict.update({"Price": pyjq.one(".adview.price[]", json_data)})
    row_dict.update(
        {"Phone": pyjq.one(".stores.byId[].phone_number", json_data)})
    row_dict.update({"Owner_type": pyjq.one(".adview.owner.type", json_data)})
    try:
        version = pattern.search(pyjq.one(".adview.subject", json_data))[0]
        row_dict.update({"Version": version.upper()})
    except TypeError:
        row_dict.update({"Version": ""})

    if (row_dict["Year"] != "2018"):
        row_dict.update({
            "Argus":
            _prix_argus_zoe(postal=regions_argus[region],
                            year=row_dict["Year"],
                            mileage=row_dict["Mileage"])
        })
    else:
        row_dict.update({"Argus": None})

    return row_dict
    def process_solr_cursor_mark(self, json_data):
        next_cursorMark = pyjq.one('{"cursorMark": .nextCursorMark}',
                                   json_data)

        return next_cursorMark['cursorMark']
 def account_number(self):
     return pyjq.one(self._ACCOUNT_NUMBER_JQ, self._json_response)
예제 #22
0
 def test_assigning_values(self):
     self.assertEqual(pyjq.one('$foo', {}, vars=dict(foo='bar')), 'bar')
     self.assertEqual(pyjq.one('$foo', {}, vars=dict(foo=['bar'])), ['bar'])
 def _token_type(self):
     return pyjq.one(self._TOKEN_TYPE_JQ, self._json_response)
 def company_cd(self):
     return pyjq.one(self._COMPANY_CD_JQ, self._json_response)
 def person_id(self):
     return pyjq.one('.accountSummaryType.personId', self._json_response)
 def _access_token(self):
     return pyjq.one(self._ACCESS_TOKEN_JQ, self._json_response)
 def prem_code(self):
     return pyjq.one(self._PREM_CODE_JQ, self._json_response)
예제 #28
0
파일: test_pyjq.py 프로젝트: yoav-orca/pyjq
def test_assigning_values():
    assert pyjq.one("$foo", {}, vars=dict(foo="bar")) == "bar"
    assert pyjq.one("$foo", {}, vars=dict(foo=["bar"])) == ["bar"]
예제 #29
0
파일: test_pyjq.py 프로젝트: ltd/pyjq
 def test_assigning_values(self):
     self.assertEqual(pyjq.one('$foo', {}, vars=dict(foo='bar')), 'bar')
     self.assertEqual(pyjq.one('$foo', {}, vars=dict(foo=['bar'])), ['bar'])
예제 #30
0
def feed_create(feed_name,
                feed_config=None,
                basedir=None,
                confirm=False,
                max_audit=5):
    '''
    Using the Configuration Specified, Query the RSS Feed and Create Audits for Missing
    Entries.
    '''

    logger = logging.getLogger("rss_creator.py:feed_create")

    audit_source_items = dict()

    if feed_config is None:
        logger.debug(
            "Feed Config Not Given, Choosing {} from Global Config.".format(
                feed_name))
        feed_config = _known_feeds[feed_name]

    this_path = os.path.join(basedir, feed_config.get("subdir", feed_name))

    if os.path.isdir(basedir) is False:
        # Base Directory Exists
        logger.error("Base Path of {} Doesn't Exist.")

        raise FileNotFoundError("Base Path Missing")

    if os.path.isdir(this_path) is False:

        logger.warning("Subdirectory doesn't exist attempting to Create")

        try:
            os.mkdir(this_path)
        except Exception as subdir_error:
            logger.error(
                "Error when creating subdirectory : {}".format(subdir_error))

            raise subdir_error

    # I have a valid place to Put my Stuff. Let's Grab my URL
    try:
        if feed_config.get("reqtype", "rss") == "rss":
            feed_obj = feedparser.parse(feed_config["url"])
        elif feed_config.get("reqtype", "json") == "json":
            feed_req = requests.get(feed_config["url"])
            feed_obj = {"entries": feed_req.json()}
    except Exception as feed_read_error:
        logger.error("Unable to Read RSS Feed Returning Empty")
        logger.debug("Feed Read Error : {}".format(feed_read_error))
        feed_obj = {"entries": list()}

    if len(feed_obj["entries"]) == 0:
        logger.warning("No Entries in Given URL.")
    else:
        # Have Entries Let's give this a whirl
        current_num = 0

        if feed_config.get("presort", False) is False:
            cycle_object = feed_obj["entries"]
        else:
            # API is Unsorted, let's Sort it
            reverse = bool(feed_config["presort"] == "reverse")

            ordered_keys = list(feed_obj["entries"].keys())
            ordered_keys.sort(reverse=reverse)

            logger.debug(ordered_keys)

            cycle_object = {k: feed_obj["entries"][k] for k in ordered_keys}

        for entry in cycle_object:
            logger.debug("Entry : {}".format(entry))
            current_num = current_num + 1

            best_source_key = None

            if "jq_obj_source_key" in feed_config.keys():
                # I have JQ to Try
                jq_result = pyjq.one(feed_config["jq_obj_source_key"], entry)

                if jq_result is not None:
                    best_source_key = jq_result

            logger.debug(
                "Best Source key After JQ : {}".format(best_source_key))

            if "regex_obj_source_key" in feed_config.keys():

                regex_result = re.search(feed_config["regex_obj_source_key"],
                                         str(best_source_key), re.I)

                if regex_result is not None:
                    best_source_key = regex_result.group(1)

            logger.debug(
                "Best Source key After Regex : {}".format(best_source_key))

            if "regex_obj_replace" in feed_config.keys():

                regex_replace = re.sub(
                    *[*feed_config["regex_obj_replace"],
                      str(best_source_key)])

                if regex_replace is not None:
                    best_source_key = regex_replace

            logger.debug(
                "Best Source key After Replace : {}".format(best_source_key))

            if best_source_key is not None and len(best_source_key) > 0:

                as_kwargs = {
                    "source_key":
                    best_source_key,
                    "audit_filename":
                    "{}.{}".format(best_source_key, feed_config["format"]),
                    "audit_path":
                    this_path,
                    **feed_config.get("audit_source_kwargs", dict())
                }

                as_args = [*feed_config.get("audit_source_args", list())]

                try:

                    as_obj = feed_config["audit_source_obj"](*as_args,
                                                             **as_kwargs)
                except Exception as audit_source_error:
                    logger.error(
                        "Unable to Pull Audit {}.".format(best_source_key))
                    logger.debug("Pull Error : {}".format(audit_source_error))
                    audit_source_items[best_source_key] = [
                        False, "Error on Creation."
                    ]
                else:
                    if as_obj.validate_audit_live() is True:

                        # See if File Exists
                        if as_obj.audit_file_exists() is False:
                            # Add to Object
                            if confirm is False:
                                logger.info(
                                    "Audit {} File Not Written to {} Confirm not Set."
                                    .format(best_source_key,
                                            as_obj.audit_filename))
                                audit_source_items[best_source_key] = [
                                    "False", "Confirm not Set"
                                ]
                            else:
                                logger.info("Audit {} Writing to {}.".format(
                                    best_source_key, as_obj.audit_filename))

                                audit_source_items[
                                    best_source_key] = as_obj.write_audit(
                                        file_format=feed_config["format"])
                        else:
                            logger.info(
                                "Audit File {} Has existing File.".format(
                                    best_source_key))
                            audit_source_items[best_source_key] = [
                                False, "Pre-Existing File."
                            ]
                    else:
                        logger.warning(
                            "Audit Finding for Source {} Not Valid.".format(
                                best_source_key))
                        audit_source_items[best_source_key] = [
                            False, "Invalid Audit on Creation"
                        ]

            else:
                logger.warning("No Source Key found for Entry : {}".format(
                    entry["id"]))

            if max_audit is not None and max_audit != -1 and current_num > (
                    max_audit - 1):
                logger.info("Reached Maximum of {} Audits Processed.".format(
                    current_num))
                break

    return audit_source_items
    def process_solr_item_count(self, json_data):
        # get number of hits
        num_found = pyjq.one('.response | {"numFound": .\"numFound\"}',
                             json_data)

        return num_found['numFound']
예제 #32
0
def _prix_argus_zoe(postal, year, mileage):
    argus_url = f"https://www.lacentrale.fr/get_co_prox.php?km={mileage}&zipcode={postal}&month=06&year={year}"
    json_data = json.loads(requests.get(argus_url, headers=argus_headers).text)
    return pyjq.one(".cote_brute", json_data)
예제 #33
0
    def process_row(self, item, index):
        """Process a row of json data against the key map
        """
        row = {}

        for header, keys in self.key_map.items():
            try:
                if keys:
                    row[header] = reduce(operator.getitem, keys, item)
                else:
                    row[header] = None
            except (KeyError, IndexError, TypeError):
                row[header] = None

        
        ######   Map-processing   row-wise   ######
        ### Preferred way to process using JQ (much much more efficient
        ### than field-wise selectors).
        
        # to make custom generated fields available in JQ as $myvar
        jq_params = row.copy()
        jq_params.update(self.context_constants)
        jq_params.update({'__row__': index})
        if self.mapprocessing:
            try:
                computed = jqp.one(self.mapprocessing, item, vars=jq_params)
                row.update(computed)
                self.header_keys.update({key: None for key in computed.keys()})
            except Exception as err:
                logging.warning(" JQ Error with map-processing JQ script '{}'. Error text: {}".format(self.mapprocessing, err))
        
        
        ######   Individual field-wise JQ selectors   ######
        ### Note: The user should rely mostly on row-wise map-processing
        ###       instead of these field-wise calls. This is left here for
        ###       historical reason since the code was still working.
        ###
        ### Field-wise JQ processing slows down the processing linearly with
        ### the number of rows (and the number of different field-wise).
        ### NB calls to JQ = NB Rows X NB Field-wise
        ###
        ### Design choice: jq scripts DO NOT override default accessors
        ### because accessing using JQ *dramatically* decreases performance
        ### for every call. It also means it is far better to group every JQ
        ### calls unless there is no other choice.
        
        for header, data in self.key_processing_map.items():
            if jqp and row[header] is None and data is not None:  # row[header] is None:
                try:
                    selector = data.get('jq')
                    args = data.get('args', {})
                    ## NOTE: this causes more variables to be available than
                    ## should be. However it's fine we let user be smart about
                    ## their selector scripts. Internals should not be abused.
                    ## Avoid performance hits
                    jq_params.update(args)
                    
                    selector = self._optimized_jq_selector(selector)
                    if selector:
                        try:
                            tmp = jqp.one(selector, item, vars=jq_params)
                        except Exception as err:
                            logging.warning("Error on key '{}' with JQ '{}'. Error text: {}".format(header, selector, err))
                            tmp = None
                        
                        row[header] = tmp
                except (KeyError, IndexError, TypeError, ValueError):
                    pass

        return row