print('You need to pass a YAML config file as an argument.')
    exit()

this_dir = os.path.dirname(os.path.realpath(__file__))

# Configuration
printp('Reading configuration')
cfg_path = os.path.join(this_dir, sys.argv[1])
cfg = read_yaml(cfg_path)
json_out = cfg['json']
input_data = cfg['input_data']
algorithm = cfg['algorithm']
normalisation = cfg['normalisation']

printp('Reading in data')
input_data = read_json(os.path.join(analysis_data, input_data))
noise_examples = lines_to_list(os.path.join(this_dir, 'noise_examples.txt'))
good_examples = lines_to_list(os.path.join(this_dir, 'good_examples.txt'))

features = []
label = []

printp('Creating classification labels')
# This creates two lists, which are inextricably linked.
# The features contain the data and at the same index of the label list, there is a label.
for example in noise_examples:
    try:
        data = input_data[example]
        features.append(data)
        label.append(0)
    except:
Esempio n. 2
0
from datamosh.utils import read_json, write_json
import os
import csv

f = read_json(
    "/Users/jamesbradbury/dev/data_bending/python_scripts/dimensionality_reduction/outputs/UMAP_2001/umap.json"
)

values = [x for x in f.values()]

master_dict = {"data": []}
print(master_dict)

for value in values:
    t_dict = {}
    t_dict["x"] = value[0]
    t_dict["y"] = value[1]
    master_dict["data"].append(t_dict)

master_dict.update(t_dict)
write_json("json_data.json", master_dict)
Esempio n. 3
0
cfg_path = os.path.join(this_script, sys.argv[1])
cfg = read_yaml(cfg_path)
json_out      = cfg['json']
input_data    = cfg['input_data']
algorithm     = cfg['algorithm']
normalisation = cfg['normalisation']
identifier    = cfg['identifier']


folder_name = f'{algorithm}_{identifier}'
output_path = os.path.join(this_script, 'outputs', folder_name)
check_make(output_path)
copyfile(cfg_path, os.path.join(output_path, 'configuration.yaml'))

printp('Reading in data')
feature = read_json(os.path.join(project_root, 'python_scripts', 'dimensionality_reduction', 'outputs', input_data))
keys    = [x for x in feature.keys()]
values  = [y for y in feature.values()]

data = np.array(values)

printp('Normalising')
if normalisation != 'none':
    if normalisation == 'minmax':
        scaler = MinMaxScaler()
    if normalisation == 'standardise':
        scaler = StandardScaler()
    scaler.fit(data)
    data = scaler.transform(data)

printp('Clustering')
Esempio n. 4
0
import os
from datamosh.utils import read_json, write_json


this_script = os.path.dirname(os.path.realpath(__file__))

level_one_path   = os.path.join(this_script, 'outputs', 'AP_UMAP-7-1-ahc_250', 'ahc_250.json')
level_two_path   = os.path.join(this_script, 'outputs', 'AP_UMAP-7-1-ahc_500', 'ahc_500.json')
level_three_path = os.path.join(this_script, 'outputs', 'AP_UMAP-7-1-ahc_1600', 'ahc_1600.json')
level_four_path  = os.path.join(this_script, 'outputs', 'AP_UMAP-7-1-ahc_3200', 'ahc_3200.json')

level_one   = read_json(level_one_path)
level_two   = read_json(level_two_path)
level_three = read_json(level_three_path)
level_four = read_json(level_four_path)


def find_hierarchy(level_one: dict, level_two: dict, file_out: str):
    """Identify relationships between hierarchies of dictionary key/pairs"""
    results = {}

    for parent_cluster in level_one:
        parent_entries = level_one[parent_cluster]
        results[parent_cluster] = {}
        t_sim = {}
        for children_cluster in level_two:
            children_entries = level_two[children_cluster]
            share = len(set(children_entries) & set(parent_entries))
            if share != 0:
                t_sim[children_cluster] = share
        results[parent_cluster] = t_sim
cfg_path = os.path.join(this_script, sys.argv[1])
cfg = read_yaml(cfg_path)
json_out = cfg['json']
input_data = cfg['input_data']
pre_reduction = cfg['pre_reduction']
normalisation = cfg['normalisation']
algorithm = cfg['algorithm']
identifier = cfg['identifier']
components = cfg['components']

folder_name = f'{algorithm}_{identifier}'
output_path = os.path.join(this_script, 'outputs', folder_name)
check_make(output_path)
copyfile(cfg_path, os.path.join(output_path, 'configuration.yaml'))

feature = read_json(os.path.join(project_root, input_data))

data = [v for v in feature.values()]
keys = [k for k in feature.keys()]

if normalisation == 'minmax':
    printp('Normalising input data')
    scaler = MinMaxScaler()
if normalisation == 'standardise':
    printp('Standardising input data')
    scaler = StandardScaler()
data = np.array(data)
data = scaler.fit_transform(data)

######### Initial Reduction ##########
if pre_reduction != 0:
import os
import networkx as nx
from datamosh.utils import read_json

this_script = os.path.dirname(os.path.realpath(__file__))
cluster_relationships = os.path.join(this_script, 'cluster_relationships')

g = nx.Graph()


layer_1 = read_json(
    os.path.join(cluster_relationships, 'cl_one.json' )
)

layer_2 = read_json(
    os.path.join(cluster_relationships, 'cl_two.json' )
)

layer_3 = read_json(
    os.path.join(cluster_relationships, 'cl_three.json' )
)

children_1 = layer_1.keys()

for child_1 in children_1:
    g.add_node(child_1)
    children_2 = 

# for child_1 in children_1:
#     g.add_node(child_1)