Python load_json Beispiele, utils.file_util.load_json Python Beispiele

Beispiel #1

0

Datei anzeigen

def make_preprocess():
    '''
        Read interim.csv and clean more data.
        1. Read StartTime as DateTime
        2. Perform binning on source and destination ports
        3. Add attribute indicating direction of flow
        4. Write to preproccessed.csv
    '''
    config = load_yaml(CONFIG_PATH)
    interim_output_path = config['interim_output_path']
    preprocessed_output_path = config['preprocessed_output_path']
    proto_dict = load_json(config['proto_dict_path'])
    dir_dict = load_json(config['dir_dict_path'])
    state_dict = load_json(config['state_dict_path'])
    # Well-known ports range from 0 through 1023
    # Registered ports are 1024 to 49151
    # Dynamic ports (also called private ports) are 49152 to 65535
    port_bins = [0, 1023, 49151, 65535]
    port_labels = [0, 1, 2]

    interim_df = pd.read_csv(interim_output_path, sep=',', escapechar='\\')
    preprocessed_df = interim_df
    preprocessed_df['StartTime'] = pd.to_datetime(preprocessed_df['StartTime'])

    preprocessed_df['Proto_Int'] = preprocessed_df['Proto'].map(proto_dict)
    preprocessed_df['Proto_Int'].fillna(proto_dict['Unknown'])
    preprocessed_df['Proto_Int'] = preprocessed_df['Proto_Int'].astype(
        'category')

    preprocessed_df['Sport_Int'] = pd.cut(preprocessed_df['Sport'],
                                          bins=port_bins,
                                          labels=port_labels,
                                          include_lowest=True)
    preprocessed_df['Sport_Int'] = preprocessed_df['Sport_Int'].astype(
        'category')

    preprocessed_df['Dir_Int'] = preprocessed_df['Dir'].map(dir_dict)
    preprocessed_df['Dir_Int'] = preprocessed_df['Dir_Int'].fillna(
        dir_dict['Unknown'])
    preprocessed_df['Dir_Int'] = preprocessed_df['Dir_Int'].astype('category')

    preprocessed_df['Dport_Int'] = pd.cut(preprocessed_df['Dport'],
                                          bins=port_bins,
                                          labels=port_labels,
                                          include_lowest=True)
    preprocessed_df['Dport_Int'] = preprocessed_df['Dport_Int'].astype(
        'category')

    preprocessed_df['State_Int'] = preprocessed_df['State'].map(state_dict)
    preprocessed_df['State_Int'] = preprocessed_df['State_Int'].fillna(
        state_dict['Unknown'])
    preprocessed_df['State_Int'] = preprocessed_df['State_Int'].astype(
        'category')

    preprocessed_df['is_fwd'] = preprocessed_df['Sport']
    preprocessed_df.loc[preprocessed_df['Sport'] >= 1024, 'is_fwd'] = 1
    preprocessed_df.loc[preprocessed_df['Sport'] < 1024, 'is_fwd'] = 0

    makedirs(dirname(preprocessed_output_path), exist_ok=True)
    preprocessed_df.to_csv(preprocessed_output_path, index=False)

Beispiel #2

0

Datei anzeigen

Datei: crawl_wiki_tree.py Projekt: linhlt-it-ee/wiki_builder

def update_entity_details(folder_name, file_regex, output_path):
    file_names = file_util.get_file_name_in_dir_regex(folder_name, file_regex)
    link_data = {}
    parent_of_leaf = []
    all_entities_from_mention = {}
    for file_name in file_names:
        print("file_name", file_name)
        entity_dict = file_util.load(file_name)
        # print(entity_dict)
        for entity_id in entity_dict:
            all_entities_from_mention[entity_id] = entity_dict[entity_id]
            linkto_infos = entity_dict[entity_id]["parents"]
            for linkto_info in linkto_infos:
                source_id = linkto_info['id']
                dest_id = linkto_info['link_to']
                if source_id == entity_id:
                    parent_of_leaf.append(dest_id)
                else:
                    parent_of_leaf.append(source_id)
                    parent_of_leaf.append(dest_id)
                link_data[source_id] = link_data.get(source_id, [])
                link_data[dest_id] = link_data.get(dest_id, [])
                if dest_id not in link_data[source_id] and dest_id != '':
                    link_data[source_id].append(dest_id)
    file_util.dump(link_data,
                   output_path + ".pck")  # "iteration3_data_dumped.pck"
    file_util.dump(parent_of_leaf, output_path + "_parent_leaf.pck")
    file_util.dump_json(link_data, output_path + ".json")
    des_short_name_dict = update_entity_description_shortname(
        link_data, all_entities_from_mention)
    file_util.dump_json(des_short_name_dict, output_path + "_brief.json")
    wiki_graph_util.convert_to_tree(link_data, des_short_name_dict)
    file_util.dump_json(all_entities_from_mention,
                        output_path + "_patent_entity_relations.json")
    excel_tree_level_export.demo(file_util.load_json("all_entity_level.json"))

Beispiel #3

0

Datei anzeigen

def make_raw_data():
    ''' create input.csv in project/data/raw/ directory '''
    config = load_yaml(CONFIG_PATH)
    binetflow_path = config['binet_output_path']
    raw_output_path = config['raw_output_path']
    dataset_path = config['dataset_path']
    dataset_json = load_json(dataset_path)
    dict_mal_hosts = dict_infected_hosts(dataset_json)
    file_list = get_file_list(binetflow_path)
    create_input_csv(file_list, binetflow_path, raw_output_path,
                     dict_mal_hosts)

Beispiel #4

0

Datei anzeigen

Datei: tohsaka.py Projekt: ye11ow/tohsaka

    def get_mystic_codes(cls):
        mystic = []

        for mystic_file in glob(pathjoin(cls.MYSTIC_PATH, '*.json')):
            mystic_json = load_json(mystic_file)

            mystic.append({
                'name': mystic_json.get('name'),
                'description': mystic_json.get('description', ''),
            })

        return mystic

Beispiel #5

0

Datei anzeigen

def load(profile, log):
    if log:
        log_util.set_file_logger(log)
    else:
        log_util.set_std_logger()

    input_params = load_json(profile)

    if not 'mystic' in input_params:
        click.echo('Invalid profile')
    else:
        tohsaka = Tohsaka(input_params.pop('mystic'), input_params)
        tohsaka.go()

Beispiel #6

0

Datei anzeigen

Datei: test_weather.py Projekt: ye11ow/tohsaka

    def test_weather(self):
        FILENAME = 'vancouver'
        tohsaka = Tohsaka(
            'weather', {
                'appid': os.environ['OPENWEATHER_TOKEN'],
                'city': 'vancouver',
                'country': 'ca',
                'output_file': FILENAME,
                'folder': tempfile.gettempdir()
            })

        tohsaka.go()

        result = load_json(
            pathjoin(tohsaka.outputter.output_folder, FILENAME + '.json'))

        assert result
        assert 'city' in result[0]
        assert 'cnt' in result[0]

Beispiel #7

0

Datei anzeigen

def get_dataset_json(file_path):
    '''Returns the json for downloading the dataset'''
    return load_json(file_path)

Beispiel #8

0

Datei anzeigen

Datei: predict.py Projekt: winsieutoc/F1sherKK-MyRoadToAI

if __name__ == "__main__":
    # Load data
    numpy_image = process_image(args.input_image_dir, T_RESIZE_CROP)

    # Load checkpoints
    checkpoint = load_checkpoint(args.checkpoint_filepath)

    # Restore model
    model = reconstruct_model(checkpoint)

    # Prediction
    probs, classes = predict(numpy_image, model, args.top_k, args.gpu)

    # Present results
    cat_to_id_map = None
    if args.category_names:
        cat_to_id_map = load_json(args.category_names)

    print("\nResults for image '{}':".format(args.input_image_dir))
    prob_class_id_tuple_list = sorted([(p, c) for p, c in zip(probs, classes)],
                                      key=lambda t: t[0],
                                      reverse=True)
    for i, (probability, class_id) in enumerate(prob_class_id_tuple_list):
        if cat_to_id_map is not None:
            class_label = cat_to_id_map[str(class_id)] + " ({})".format(
                class_id)
        else:
            class_label = "(Class id: {})".format(class_id)
        print("  {}. {} % - {}".format(i, np.round(probability * 100, 2),
                                       class_label))

Beispiel #9

0

Datei anzeigen

Datei: crawl_wiki_tree.py Projekt: linhlt-it-ee/wiki_builder

        entities = wiki_util.get_wiki_id_from_text(word, entity_dict, iter_num)
        if singu_word != word:
            entities.extend(
                wiki_util.get_wiki_id_from_text(singu_word, entity_dict,
                                                iter_num))
        if len(entities) == 0:
            not_found_entity.append(word)
        file_util.dump(entity_dict, output_entity_file)
        file_util.dump(not_found_entity, not_wiki_output)
        print(i, '/', total, ')')  #, word, '###', entities, '###'
    # file_util.dump(entity_dict, "entities_dict_wth_lvl.pck")
    file_util.dump(entity_dict, output_entity_file)
    file_util.dump(not_found_entity, not_wiki_output)


if __name__ == "__main__":
    choice = int(sys.argv[1:][0])
    if not choice:  #choice=0 folder_name, start, end, iteration
        # python3 sony_patent_evaluation/test/crawl_wiki_tree.py 0 entity_folder_09122019 0 10 2
        search_wiki_with_threads(sys.argv[1:][1], int(sys.argv[1:][2]),
                                 int(sys.argv[1:][3]), int(sys.argv[1:][4]))
    elif choice == 1:
        #python3 sony_patent_evaluation/test/crawl_wiki_tree.py 1 "entity_folder_09122019" "_dict_iteration.pck" "09_12_2019"
        update_entity_details(sys.argv[1:][1], sys.argv[1:][2],
                              sys.argv[1:][3])
    else:
        #python3 sony_patent_evaluation/test/crawl_wiki_tree.py 1 "entity_folder_03122019" "_dict_iteration.pck" "07_12_2019"
        excel_tree_level_export.demo(
            file_util.load_json("all_entity_level.json"))
# update_entity_details("entity_folder_09122019", "_dict_iteration.pck", "09_12_2019")

Beispiel #10

0

Datei anzeigen

Datei: tohsaka.py Projekt: ye11ow/tohsaka

    def load_mystic_code(cls, mystic_code):
        filepath = pathjoin(cls.MYSTIC_PATH, mystic_code + '.json')

        return load_json(filepath)