Ejemplo n.º 1
0
def experiment2(clusters_number, lists_number):
    reader = DataReader()  # "../data/test1.json")

    for i in range(lists_number - 1, lists_number):

        for j in range(0, 2):

            input_matrix = reader.get_matrix_by_list(i)

            if j == 1:
                kmeans = KMeans(n_clusters=clusters_number, init='k-means++')
            else:
                centroids = create_initial_centroids(clusters_number,
                                                     input_matrix)
                kmeans = KMeans(n_clusters=clusters_number,
                                n_init=1,
                                init=centroids)

            clasterization = kmeans.fit_predict(input_matrix)

            sets = users_index_sets_to_users_sets(
                clusters_list_to_users_index_sets(clasterization), reader)
            print("\nClasterization by list %s" % i)
            show_users_sets(sets)

            out = OutputWriter()
            out.write_rewrite(
                OutFiles.centroids_custom
                if j == 0 else OutFiles.centroids_embedded, "")
            print("Metrics:")
            for user_set in sets:
                m = TeamMetric(user_set)
Ejemplo n.º 2
0
def read_row(filenames):
    """Read a row of data from list of H5 files"""
    reader = DataReader(filenames)
    x, y, s = reader.read_row_tf()
    x.set_shape((3, 160, 320))
    y.set_shape(1)
    s.set_shape(1)
    return x, y, s
Ejemplo n.º 3
0
def agglomerative_vs_pc(teams_number):
    need_balance = False

    for variant in variants:
        reader = DataReader(variant)

        for teams_number in range(2, 10):
            clusterize_and_compare_by_desires(reader, teams_number,
                                              need_balance)
Ejemplo n.º 4
0
def experiment1(clusters_number, lists_number):
    reader = DataReader()  #"../data/test1.json"
    kmeans = KMeans(n_clusters=clusters_number)
    clasterizations = []
    for i in range(0, lists_number):
        clasterization = kmeans.fit_predict(reader.get_matrix_by_list(i))
        sets = users_index_sets_to_users_sets(
            clusters_list_to_users_index_sets(clasterization), reader)
        print("Clasterisation by list %s" % i)
        show_users_sets(sets)
        for user_set in sets:
            m = TeamMetric(user_set)
            print("Metric is: " + str(m))
Ejemplo n.º 5
0
def main():
    train_path = 'C:/Users/dronp/Documents/TPC/train'
    train_filename = 'gold_labels.txt'
    data, ids = DataReader().read_gold_data(train_path, train_filename)

    texts = [example[0] for example in data]
    labels = [example[1] for example in data]

    solution = Solution()
    predicted = solution.predict(texts)

    accuracy_evaluator = AccuracyEvaluator()
    accuracy_evaluator.evaluate(labels, predicted)

    print(quality(predicted, labels))
Ejemplo n.º 6
0
def complete_vs_avg(teams_number):
    for variant in variants:
        reader = DataReader(variant)

        def form_line(metric: ClusteringMetric):
            return "{},{},{}".format(metric.average_metric, metric.min_metric,
                                     metric.max_metric)

        clustering_alg = UsersAgglomerativeClustering(reader, teams_number)
        clustering_alg.linkage = "complete"
        sets = clustering_alg.clusterize()

        complete = ClusteringMetric(sets)

        clustering_alg = UsersAgglomerativeClustering(reader, teams_number)
        clustering_alg.linkage = "average"
        sets = clustering_alg.clusterize()

        average = ClusteringMetric(sets)

        print(form_line(average) + "," + form_line(complete))
Ejemplo n.º 7
0
import pickle
import numpy as np
import scipy.misc
from utils.data_reader import DataReader

#def readRecords(path):

if __name__ == "__main__":
    mainFolder = "/home/ali/SharedFolder/detector_test/unetOptimization" \
                 "/measurement_campaign_20200430/data/"
    imageFolder = mainFolder + "imgs/"
    powerFolder = mainFolder + "powers/"
    assocFolder = mainFolder + "assoc/"
    path = mainFolder + \
           "res_UL_HP10m-K16-M128-sh0_Opt(IEQpower-lmda0.0,maxMinSNR,UL-bisec-Power(lp)-IPAP(iib)-isRoun0,sci-int,sci-int,1229-76-0,InitAssMat-sta-205).pkl"
    dataSet = DataReader(path)
    sys.exit(1)

    with open(path, "rb") as file:
        data = pickle.load(file)
    numIterations = data['iiter']
    for i in np.arange(0, numIterations, 1):
        imageFile = "sample_" + str(i)
        associationMatrix = \
            data['Ipap'][i]['APschdul'][-1]['switch_mat']
        roundedAssociationMatrix = (np.around(associationMatrix, decimals=0))
        beta = np.log10(data['lscale_beta'][i])
        allocatedPower = data['pload'][i]['zzeta_opt']

        scipy.misc.toimage(beta).save(imageFile + ".jpg")
        scipy.misc.toimage(associationMatrix).save(imageFile + "_mask.jpg")
Ejemplo n.º 8
0
import multiprocessing as mp
import fasttext as ft
import csv, os

FOLDER = "fasttext_tool/"
def saveInfoToFile(row, output):
    output.write("__label__{} {}\n".format(row['polarity'], str(row['text'])))
    return ""

def adjustForm(dataSet, fileName):
    print("Transforming...")
    with open('{}{}'.format(FOLDER, fileName), 'w+') as output:
        dataSet.apply(lambda x: saveInfoToFile(x, output), axis=1)

if __name__ == "__main__":
    dataReader = DataReader()
    evaluator = Evaluator()
    if not "data.train" in os.listdir(FOLDER):
        dataSet = dataReader.read_data_set()
        adjustForm(dataSet, "data.train")
    if not "data.test" in os.listdir(FOLDER):
        testSet = dataReader.read_test_set()
        adjustForm(testSet, "data.test")
    if not "model.bin" in os.listdir(FOLDER):
        model = ft.train_supervised(input=FOLDER + "data.train")
        model.save_model(FOLDER + "model.bin")
    else:
        model = ft.load_model(FOLDER + "model.bin")
    (_, precision, recall) = model.test(FOLDER + "data.test")
    metrics = {'precision': precision, 'recall': recall, 'fscore': evaluator.calculate_fscore(precision, recall)}
    metrics_str = evaluator.getString(metrics)
Ejemplo n.º 9
0
def train_net(net,
              device,
              epochs=5,
              batch_size=1,
              lr=0.001,
              val_percent=0.1,
              save_cp=True,
              img_scale=0.5,
              mode=0,
              alpha=.5):
    #mode=0 ==> AP-scheduling only
    #mode=1 ==> power allocation only
    #mode=2 ==> joint AP-scheduling and power allocation
    dataset = DataReader(rawPath, mode)
    n_val = int(len(dataset) * val_percent)
    n_train = len(dataset) - n_val
    train, val = random_split(dataset, [n_train, n_val])
    train_loader = DataLoader(train,
                              batch_size=batch_size,
                              shuffle=True,
                              num_workers=8,
                              pin_memory=True)
    val_loader = DataLoader(val,
                            batch_size=batch_size,
                            shuffle=False,
                            num_workers=8,
                            pin_memory=True,
                            drop_last=True)
    print("Number of training instances=" + str(n_train))
    print("Number of validation instances=" + str(n_val))

    writer = SummaryWriter(
        comment=f'LR_{lr}_BS_{batch_size}_SCALE_{img_scale}')
    global_step = 0

    logging.info(f'''Starting training:
        Epochs:          {epochs}
        Batch size:      {batch_size}
        Learning rate:   {lr}
        Training size:   {n_train}
        Validation size: {n_val}
        Checkpoints:     {save_cp}
        Device:          {device.type}
        Images scaling:  {img_scale}
    ''')

    #Define optimizer and scheduler
    optimizer = optim.RMSprop(net.parameters(),
                              lr=lr,
                              weight_decay=1e-8,
                              momentum=0.9)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, 'min' if net.n_classes > 1 else 'max', patience=2)

    criterionAP = nn.BCEWithLogitsLoss()
    criterionPower = nn.MSELoss()

    #Main loop over epochs
    for epoch in range(epochs):
        net.train()

        epoch_loss = 0
        with tqdm(total=n_train,
                  desc=f'Epoch {epoch + 1}/{epochs}',
                  unit='img') as pbar:
            for batch in train_loader:
                imgs = batch['image']
                true_masks = batch['mask']
                true_powers = batch['power']
                assert imgs.shape[1] == net.n_channels, \
                    f'Network has been defined with {net.n_channels} input channels, ' \
                    f'but loaded images have {imgs.shape[1]} channels. Please check that ' \
                    'the images are loaded correctly.'

                imgs = imgs.to(device=device, dtype=torch.float32)
                mask_type = torch.float32 if net.n_classes == 1 else torch.long
                power_type = torch.long
                true_masks = true_masks.to(device=device, dtype=mask_type)
                true_powers = true_powers.to(device=device, dtype=power_type)

                masks_pred = net(imgs)

                if "AP" in masks_pred:
                    lossAP = criterionAP(masks_pred["AP"], true_masks)
                if "Power" in masks_pred:
                    lossPower = criterionPower(masks_pred["Power"],
                                               true_powers)
                if mode == 0:
                    loss = lossAP
                elif mode == 1:
                    loss = lossPower
                else:
                    loss = alpha * lossAP + (1 - alpha) * lossPower

                epoch_loss += loss.item()
                writer.add_scalar('Loss/train', loss.item(), global_step)

                pbar.set_postfix(**{'loss (batch)': loss.item()})

                optimizer.zero_grad()
                loss.backward()
                nn.utils.clip_grad_value_(net.parameters(), 0.1)
                optimizer.step()

                pbar.update(imgs.shape[0])
                global_step += 1
                if global_step % (len(dataset) // (10 * batch_size)) == 0:
                    for tag, value in net.named_parameters():
                        tag = tag.replace('.', '/')
                        writer.add_histogram('weights/' + tag,
                                             value.data.cpu().numpy(),
                                             global_step)
                        writer.add_histogram('grads/' + tag,
                                             value.grad.data.cpu().numpy(),
                                             global_step)
                    val_score_AP, val_score_Power = eval_net_AP_Power(
                        net, val_loader, device)
                    if mode == 0:
                        val_score = val_score_AP
                    elif mode == 1:
                        val_score = val_score_Power
                    else:
                        val_score = alpha * val_score_AP + (
                            1 - alpha) * val_score_Power
                    scheduler.step(val_score)
                    writer.add_scalar('learning_rate',
                                      optimizer.param_groups[0]['lr'],
                                      global_step)

                    if net.n_classes > 1:
                        logging.info(
                            'Validation cross entropy: {}'.format(val_score))
                        writer.add_scalar('Loss/test', val_score, global_step)
                    else:
                        logging.info(
                            'Validation Dice Coeff: {}'.format(val_score))
                        writer.add_scalar('Dice/test', val_score, global_step)

                    writer.add_images('images', imgs, global_step)
                    if net.n_classes == 1:
                        writer.add_images('masks/true', true_masks,
                                          global_step)
                        writer.add_images('masks/pred',
                                          torch.sigmoid(masks_pred) > 0.5,
                                          global_step)

        if save_cp:
            try:
                os.mkdir(dir_checkpoint)
                logging.info('Created checkpoint directory')
            except OSError:
                pass
            torch.save(net.state_dict(),
                       dir_checkpoint + f'CP_epoch{epoch + 1}.pth')
            logging.info(f'Checkpoint {epoch + 1} saved !')

    writer.close()
Ejemplo n.º 10
0
# Gave good metric
# [6,13,20,24]
# [7,14,15,17]
# [9,12,18,19]
# [4,10,11,25]
# [5,16,21,22]

# The best clustering ever
# [10,20,18,9]
# [16,15,19,22]
# [17,14,7,5]
# [24,25,4,13]
# [6,11,12,21]

if __name__ == '__main__':
    reader = DataReader("../data/ms-sne_names.json")
    clusters_ids = [[41, 50, 51], [42, 46, 47], [43, 54], [44, 53], [45, 48]]

    # Create clusters of users
    clusters = [[
        reader.get_user_by_id(clusters_ids[cluster_index][user_index])
        for user_index in range(0, len(clusters_ids[cluster_index]))
    ] for cluster_index in range(0, len(clusters_ids))]

    # Display clusters
    print("\nFinal clusters:")
    show_users_sets(clusters)

    # Display clusters metrics
    for user_set in clusters:
        metric = TeamMetric(set(user_set))
Ejemplo n.º 11
0
def experiment3(clustering_tool_type, clusters_number, input_data_file_name,
                lists_count):
    result_clusters = []
    is_all_clustered = False
    reader = DataReader(input_data_file_name)
    clustered_users = []
    users_count = len(cu.get_not_clustered_users_set(reader, clustered_users))
    max_cluster_size = int(ceil(users_count / clusters_number))

    while not is_all_clustered:

        # Get clusterings by lists
        clustering_tool = cu.ClusteringTools.build_clustering_tool(
            clusters_number, max_cluster_size, clustering_tool_type)
        clusterings = cu.cluster_users(clustering_tool, reader,
                                       clustered_users, clusters_number,
                                       lists_count)

        # Displaying info about the clustering (temporary)
        print("\nClustering by list %s" % 1)
        show_users_sets(clusterings.get_clustering_by_list_number(0))
        print("Clustering by list %s" % 2)
        show_users_sets(clusterings.get_clustering_by_list_number(1))

        # Find the maximum common part of the clusters of the different lists
        new_cluster = clusterings.get_max_common_part_of_clusterings()
        print("Common part: " + str([user.get_id() for user in new_cluster]))

        # Is it necessary to kick the user?
        while len(new_cluster) > max_cluster_size:
            new_cluster = cu.kick_user_from_cluster(new_cluster, lists_count)

        # Remember users which have been clustered
        clustered_users.extend(new_cluster)

        # Save cluster and reduce required clusters number
        result_clusters.append(new_cluster)
        clusters_number -= 1

        # Check the terminal condition
        is_all_clustered = True if len(
            result_clusters) >= CLUSTERS_COUNT else False

    # Display clusters before balancing
    print("\nClusters before balancing:")
    show_users_sets(result_clusters)

    # Display clusters metrics
    for user_set in result_clusters:
        if len(user_set) != 0:
            metric = TeamMetric(set(user_set))
            print(metric.get_final_metric_value())

    # There are clusters with more than maximum users? Fix it.
    result_clusters = cu.balance_after_clustering(
        result_clusters,
        cu.get_not_clustered_users_set(reader, clustered_users), lists_count,
        max_cluster_size)

    # Display final clusters
    print("\nFinal clusters:")
    show_users_sets(result_clusters)

    # Display final clusters metrics
    final_metric_value = 0
    for user_set in result_clusters:
        metric = TeamMetric(set(user_set))
        final_metric_value += metric.get_final_metric_value()
        print(metric)

    return {"clusters": result_clusters, "metric": final_metric_value}
Ejemplo n.º 12
0
from experiments.experiment4.preferences_clustering import PreferencesClustering
from utils.data_reader import DataReader
from utils.json_serializer import Serializer
from utils.metrics import TeamDesiresMetric, ClusteringMetric

__author__ = 'Xomak'

reader = DataReader("../data/ms-sne.json")
pc = PreferencesClustering(reader.get_all_users(), 6)
result = pc.clusterize()
for current_set in result:
    output = []
    for user in current_set:
        output.append(str(user))
    print(','.join(output))
    print(TeamDesiresMetric(current_set))
print(ClusteringMetric(result).get_average_desires_metric())
Serializer.serialize_to_file(result, "../web-visualiser/data.json")

Ejemplo n.º 13
0
import utils.clustering_utils as cu
from experiments.experiment3.values_clustering import ValuesClustering
from utils.data_reader import DataReader
from utils.json_serializer import Serializer
from utils.metrics import ClusteringMetric, MetricTypes

REPEATS_COUNT = 10
CLUSTERS_COUNT = 3
LISTS_COUNT = 2
INPUT_DATA_FILENAME = "../data/users.json"
CLUSTERING_TOOL_TYPE = cu.ClusteringTools.KMEANS
METRIC_TYPE = MetricTypes.LISTS

if __name__ == '__main__':

    reader = DataReader(INPUT_DATA_FILENAME)
    clustering = ValuesClustering(CLUSTERING_TOOL_TYPE, reader)

    result_clusters = clustering.cluster(CLUSTERS_COUNT, LISTS_COUNT,
                                         REPEATS_COUNT)

    result_metric = ClusteringMetric(
        list(map(lambda cluster: set(cluster), result_clusters)), METRIC_TYPE)

    print("\n%s" % str(result_metric))
    Serializer.serialize_to_file(result_clusters,
                                 "../web-visualiser/data.json")
Ejemplo n.º 14
0
def clusterize(filename, teams_number):
    reader = DataReader(filename)
    clustering_alg = UsersAgglomerativeClustering(reader, teams_number)
    cl = clustering_alg.clusterize()
    Serializer.serialize_to_file(cl, "../web-visualiser/data.json")
Ejemplo n.º 15
0
IMAGE_DIR = "/home/milton/dataset/segmentation/Materials_In_Vessels/Train_Images/"
LABEL_DIR = "/home/milton/dataset/segmentation/Materials_In_Vessels/LiquidSolidLabels/"
PRE_TRAIN_MODEL_PATH = "/home/milton/dataset/trained_models/vgg16.npy"

NUM_CLASSES = 4
EPOCHS = 5
BATCH_SIZE = 5
GPU_NUM = 2
LEARNING_RATE = 1e-5
LOGS_DIR = "/home/milton/research/code-power-logs/fcnvgg16/"
TOWER_NAME = 'tower'
log_device_placement = True

# ..................... Create Data Reader ......................................#
data_reader = DataReader(image_dir=IMAGE_DIR,
                         label_dir=LABEL_DIR,
                         batch_size=BATCH_SIZE)
data_reader.loadDataSet()
ITERATIONS = EPOCHS * data_reader.total_train_count / (BATCH_SIZE * GPU_NUM)

print("Total Iterations {}".format(ITERATIONS))


def tower_loss(scope, images, labels, net, keep_prob):
    """Calculate the total loss on a single tower running the CIFAR model.
  Args:
    scope: unique prefix string identifying the CIFAR tower, e.g. 'tower_0'
    images: Images. 4D tensor of shape [batch_size, height, width, 3].
    labels: Labels. 1D tensor of shape [batch_size].
  Returns:
     Tensor of shape [] containing the total loss for a batch of data
Ejemplo n.º 16
0
                    f.write(out_string)


desires_weight = 1
need_balance = True
metric_type = MetricTypes.DESIRES
data_files_names = [
    "../data/ms-sne_names.json", "../data/eltech-vector.json",
    "../data/users.json"
]

results = ResultHolder(data_files_names)

for data_file_name in data_files_names:

    reader = DataReader(data_file_name)
    max_teams = int(len(reader.get_all_users()) / 2)
    for teams in range(2, max_teams + 1):

        print("\n\nTEAMS: %d\n" % teams)

        # Spectral
        clustering_alg = UsersSpectralClustering(reader,
                                                 teams,
                                                 desires_weight=desires_weight,
                                                 need_balance=need_balance)
        sets = clustering_alg.cluster()
        results.add_metric_for(
            data_file_name, teams,
            ClusteringMetric(sets, metric_type).get_final_metric())