Python load_jsonの例、utils.file_util.load_json Pythonの例

コード例 #1

0

ファイルを表示

def make_preprocess():
    '''
        Read interim.csv and clean more data.
        1. Read StartTime as DateTime
        2. Perform binning on source and destination ports
        3. Add attribute indicating direction of flow
        4. Write to preproccessed.csv
    '''
    config = load_yaml(CONFIG_PATH)
    interim_output_path = config['interim_output_path']
    preprocessed_output_path = config['preprocessed_output_path']
    proto_dict = load_json(config['proto_dict_path'])
    dir_dict = load_json(config['dir_dict_path'])
    state_dict = load_json(config['state_dict_path'])
    # Well-known ports range from 0 through 1023
    # Registered ports are 1024 to 49151
    # Dynamic ports (also called private ports) are 49152 to 65535
    port_bins = [0, 1023, 49151, 65535]
    port_labels = [0, 1, 2]

    interim_df = pd.read_csv(interim_output_path, sep=',', escapechar='\\')
    preprocessed_df = interim_df
    preprocessed_df['StartTime'] = pd.to_datetime(preprocessed_df['StartTime'])

    preprocessed_df['Proto_Int'] = preprocessed_df['Proto'].map(proto_dict)
    preprocessed_df['Proto_Int'].fillna(proto_dict['Unknown'])
    preprocessed_df['Proto_Int'] = preprocessed_df['Proto_Int'].astype(
        'category')

    preprocessed_df['Sport_Int'] = pd.cut(preprocessed_df['Sport'],
                                          bins=port_bins,
                                          labels=port_labels,
                                          include_lowest=True)
    preprocessed_df['Sport_Int'] = preprocessed_df['Sport_Int'].astype(
        'category')

    preprocessed_df['Dir_Int'] = preprocessed_df['Dir'].map(dir_dict)
    preprocessed_df['Dir_Int'] = preprocessed_df['Dir_Int'].fillna(
        dir_dict['Unknown'])
    preprocessed_df['Dir_Int'] = preprocessed_df['Dir_Int'].astype('category')

    preprocessed_df['Dport_Int'] = pd.cut(preprocessed_df['Dport'],
                                          bins=port_bins,
                                          labels=port_labels,
                                          include_lowest=True)
    preprocessed_df['Dport_Int'] = preprocessed_df['Dport_Int'].astype(
        'category')

    preprocessed_df['State_Int'] = preprocessed_df['State'].map(state_dict)
    preprocessed_df['State_Int'] = preprocessed_df['State_Int'].fillna(
        state_dict['Unknown'])
    preprocessed_df['State_Int'] = preprocessed_df['State_Int'].astype(
        'category')

    preprocessed_df['is_fwd'] = preprocessed_df['Sport']
    preprocessed_df.loc[preprocessed_df['Sport'] >= 1024, 'is_fwd'] = 1
    preprocessed_df.loc[preprocessed_df['Sport'] < 1024, 'is_fwd'] = 0

    makedirs(dirname(preprocessed_output_path), exist_ok=True)
    preprocessed_df.to_csv(preprocessed_output_path, index=False)

コード例 #2

0

ファイルを表示

ファイル: crawl_wiki_tree.py プロジェクト: linhlt-it-ee/wiki_builder

def update_entity_details(folder_name, file_regex, output_path):
    file_names = file_util.get_file_name_in_dir_regex(folder_name, file_regex)
    link_data = {}
    parent_of_leaf = []
    all_entities_from_mention = {}
    for file_name in file_names:
        print("file_name", file_name)
        entity_dict = file_util.load(file_name)
        # print(entity_dict)
        for entity_id in entity_dict:
            all_entities_from_mention[entity_id] = entity_dict[entity_id]
            linkto_infos = entity_dict[entity_id]["parents"]
            for linkto_info in linkto_infos:
                source_id = linkto_info['id']
                dest_id = linkto_info['link_to']
                if source_id == entity_id:
                    parent_of_leaf.append(dest_id)
                else:
                    parent_of_leaf.append(source_id)
                    parent_of_leaf.append(dest_id)
                link_data[source_id] = link_data.get(source_id, [])
                link_data[dest_id] = link_data.get(dest_id, [])
                if dest_id not in link_data[source_id] and dest_id != '':
                    link_data[source_id].append(dest_id)
    file_util.dump(link_data,
                   output_path + ".pck")  # "iteration3_data_dumped.pck"
    file_util.dump(parent_of_leaf, output_path + "_parent_leaf.pck")
    file_util.dump_json(link_data, output_path + ".json")
    des_short_name_dict = update_entity_description_shortname(
        link_data, all_entities_from_mention)
    file_util.dump_json(des_short_name_dict, output_path + "_brief.json")
    wiki_graph_util.convert_to_tree(link_data, des_short_name_dict)
    file_util.dump_json(all_entities_from_mention,
                        output_path + "_patent_entity_relations.json")
    excel_tree_level_export.demo(file_util.load_json("all_entity_level.json"))

コード例 #3

0

ファイルを表示

def make_raw_data():
    ''' create input.csv in project/data/raw/ directory '''
    config = load_yaml(CONFIG_PATH)
    binetflow_path = config['binet_output_path']
    raw_output_path = config['raw_output_path']
    dataset_path = config['dataset_path']
    dataset_json = load_json(dataset_path)
    dict_mal_hosts = dict_infected_hosts(dataset_json)
    file_list = get_file_list(binetflow_path)
    create_input_csv(file_list, binetflow_path, raw_output_path,
                     dict_mal_hosts)

コード例 #4

0

ファイルを表示

ファイル: tohsaka.py プロジェクト: ye11ow/tohsaka

    def get_mystic_codes(cls):
        mystic = []

        for mystic_file in glob(pathjoin(cls.MYSTIC_PATH, '*.json')):
            mystic_json = load_json(mystic_file)

            mystic.append({
                'name': mystic_json.get('name'),
                'description': mystic_json.get('description', ''),
            })

        return mystic

コード例 #5

0

ファイルを表示

def load(profile, log):
    if log:
        log_util.set_file_logger(log)
    else:
        log_util.set_std_logger()

    input_params = load_json(profile)

    if not 'mystic' in input_params:
        click.echo('Invalid profile')
    else:
        tohsaka = Tohsaka(input_params.pop('mystic'), input_params)
        tohsaka.go()

コード例 #6

0

ファイルを表示

ファイル: test_weather.py プロジェクト: ye11ow/tohsaka

    def test_weather(self):
        FILENAME = 'vancouver'
        tohsaka = Tohsaka(
            'weather', {
                'appid': os.environ['OPENWEATHER_TOKEN'],
                'city': 'vancouver',
                'country': 'ca',
                'output_file': FILENAME,
                'folder': tempfile.gettempdir()
            })

        tohsaka.go()

        result = load_json(
            pathjoin(tohsaka.outputter.output_folder, FILENAME + '.json'))

        assert result
        assert 'city' in result[0]
        assert 'cnt' in result[0]

コード例 #7

0

ファイルを表示

def get_dataset_json(file_path):
    '''Returns the json for downloading the dataset'''
    return load_json(file_path)

コード例 #8

0

ファイルを表示

ファイル: predict.py プロジェクト: winsieutoc/F1sherKK-MyRoadToAI

if __name__ == "__main__":
    # Load data
    numpy_image = process_image(args.input_image_dir, T_RESIZE_CROP)

    # Load checkpoints
    checkpoint = load_checkpoint(args.checkpoint_filepath)

    # Restore model
    model = reconstruct_model(checkpoint)

    # Prediction
    probs, classes = predict(numpy_image, model, args.top_k, args.gpu)

    # Present results
    cat_to_id_map = None
    if args.category_names:
        cat_to_id_map = load_json(args.category_names)

    print("\nResults for image '{}':".format(args.input_image_dir))
    prob_class_id_tuple_list = sorted([(p, c) for p, c in zip(probs, classes)],
                                      key=lambda t: t[0],
                                      reverse=True)
    for i, (probability, class_id) in enumerate(prob_class_id_tuple_list):
        if cat_to_id_map is not None:
            class_label = cat_to_id_map[str(class_id)] + " ({})".format(
                class_id)
        else:
            class_label = "(Class id: {})".format(class_id)
        print("  {}. {} % - {}".format(i, np.round(probability * 100, 2),
                                       class_label))

コード例 #9

0

ファイルを表示

ファイル: crawl_wiki_tree.py プロジェクト: linhlt-it-ee/wiki_builder

        entities = wiki_util.get_wiki_id_from_text(word, entity_dict, iter_num)
        if singu_word != word:
            entities.extend(
                wiki_util.get_wiki_id_from_text(singu_word, entity_dict,
                                                iter_num))
        if len(entities) == 0:
            not_found_entity.append(word)
        file_util.dump(entity_dict, output_entity_file)
        file_util.dump(not_found_entity, not_wiki_output)
        print(i, '/', total, ')')  #, word, '###', entities, '###'
    # file_util.dump(entity_dict, "entities_dict_wth_lvl.pck")
    file_util.dump(entity_dict, output_entity_file)
    file_util.dump(not_found_entity, not_wiki_output)


if __name__ == "__main__":
    choice = int(sys.argv[1:][0])
    if not choice:  #choice=0 folder_name, start, end, iteration
        # python3 sony_patent_evaluation/test/crawl_wiki_tree.py 0 entity_folder_09122019 0 10 2
        search_wiki_with_threads(sys.argv[1:][1], int(sys.argv[1:][2]),
                                 int(sys.argv[1:][3]), int(sys.argv[1:][4]))
    elif choice == 1:
        #python3 sony_patent_evaluation/test/crawl_wiki_tree.py 1 "entity_folder_09122019" "_dict_iteration.pck" "09_12_2019"
        update_entity_details(sys.argv[1:][1], sys.argv[1:][2],
                              sys.argv[1:][3])
    else:
        #python3 sony_patent_evaluation/test/crawl_wiki_tree.py 1 "entity_folder_03122019" "_dict_iteration.pck" "07_12_2019"
        excel_tree_level_export.demo(
            file_util.load_json("all_entity_level.json"))
# update_entity_details("entity_folder_09122019", "_dict_iteration.pck", "09_12_2019")

コード例 #10

0

ファイルを表示

ファイル: tohsaka.py プロジェクト: ye11ow/tohsaka

    def load_mystic_code(cls, mystic_code):
        filepath = pathjoin(cls.MYSTIC_PATH, mystic_code + '.json')

        return load_json(filepath)