Exemple #1
0
def plot_budget():
    data_frame = get_data_frames_combined()

    print(data_frame.head(20))

    places = data_frame[1].unique()
    categories = data_frame[3].unique()

    color_generator = RandomColor()

    # print('# PAIKAT')
    # print(places)
    # print('# KATEGORIAT')
    # print(categories)

    plot.subplot(121)

    for category in categories:
        category_filter = data_frame[3] == category
        fdf = data_frame[category_filter]

        plot.scatter(fdf[0],
                     fdf[2],
                     color=color_generator.generate()[0],
                     marker='.',
                     label=category)

    plot.xlabel('Ajankohta ms alkaen 1.1.1970 (UNIX timestamp)')
    plot.ylabel('Hinta (EUR)')
    plot.legend(bbox_to_anchor=(1, 1), loc='upper left', ncol=1)
    plot.show()
def update_random_colors(results):
	
	'''
		results should be dict items
		for eg. : [{"item": 1}, ...]
	'''
	color = RandomColor()

	for res in results:
		res.update({"color": color.generate()[0]})

	return results
Exemple #3
0
    def plot_data(data):
        num_sample = 5000
        label = data[:, -1]
        feature = data[:, :-1]

        assignment = {}

        for i in range(len(feature)):
            if label[i] not in assignment:
                assignment[label[i]] = []

            assignment[label[i]].append(i)

        # down sample
        old_assignment = assignment
        assignment = {}

        indicies = []
        for label in old_assignment:
            last_length = len(indicies)
            indicies += np.random.choice(
                old_assignment[label],
                size=min(int(num_sample / len(old_assignment)),
                         len(old_assignment[label])),
                replace=False).tolist()
            assignment[label] = np.arange(last_length, len(indicies))

        feature = feature[indicies]
        print(feature.shape)
        print(len(indicies))
        print(len(np.unique(indicies)))

        tsne = TSNE()
        x = tsne.fit_transform(feature)

        fig, ax = plt.subplots()

        # ax.plot(x[:, 0], x[:, 1], '*')
        r = RandomColor()
        colors = r.generate(count=len(assignment))
        for i, label in enumerate(assignment):
            ax.plot(x[assignment[label]][:, 0],
                    x[assignment[label]][:, 1],
                    '*',
                    color=colors[i],
                    label=label)
        plt.legend()
        plt.show()
Exemple #4
0
    def Domains(self):

        seq_records = SeqIO.parse(os.path.join('Input', 'ProteinInput'),
                                  'fasta')

        input_protein_list = []
        for record in seq_records:
            input_protein_list.append(record.id)

        Complete_Domains = []
        domainNameList = []
        for item in input_protein_list:
            sleep(self.timer)
            e_fetch = Entrez.efetch(db='protein',
                                    id="%s" % item,
                                    retmax=1000,
                                    rettype='gb',
                                    retmode='fasta')

            for seq_record in SeqIO.parse(e_fetch, 'gb'):
                domain_list = []
                accession_number = seq_record.id

                for i in range(len(seq_record.features)):
                    if seq_record.features[i].type == 'Region':
                        domain_location = str(
                            seq_record.features[i].location).split(
                                '[')[1].split(']')[0]
                        domain_name = str(
                            seq_record.features[i].qualifiers['region_name']
                            [0])
                        domainNameList.append(domain_name)

                        domain_list.append([domain_name, domain_location])

                Complete_Domains.append(dict([(accession_number, domain_list)
                                              ]))

        rand_color = RandomColor()
        domains_dict_colors = {
            domain: rand_color.generate()[0]
            for domain in set(domainNameList)
        }
        Domains = [domain for domain in set(domainNameList)]

        return Complete_Domains, domains_dict_colors, Domains
Exemple #5
0
def colored_pb_example(example):
	arg_text_examples = [arg['arg_text'] for arg in example['args']]
	colors = RandomColor(seed = example['example_name']).generate(count=len(example['args']), luminosity='light')
	color_dict = {arg_text:color for arg_text,color in zip(arg_text_examples, colors)}
	colored_arg_dict = {arg_text:'<span style=background-color:'+color_dict[arg_text]+'>'+arg_text+'</span>' for arg_text in arg_text_examples }
	for arg in example['args']:
		arg['arg_text'] = colored_arg_dict[arg['arg_text']]
	
	for arg_text in arg_text_examples:
		example['example_text'] = example['example_text'].replace(arg_text, colored_arg_dict[arg_text])
		
	return example
Exemple #6
0
def formatted_def(frame, markup):
    frame_elements = frame['elements']
    colors = RandomColor(seed=frame['name']).generate(
        count=len(frame_elements), luminosity='light')
    color_dict = {f['fe_name']: c for f, c in zip(frame_elements, colors)}
    color_dict_abbrev = {
        f['abbrev']: c
        for f, c in zip(frame_elements, colors)
    }

    pattern_fen = re.compile('<fen[^>]*>[^<]+</fen>')
    for tagset in set(re.findall(pattern_fen, markup)):
        fen = tagset[5:-6]
        try:
            fen_color = color_dict[fen]
            markup = markup.replace(
                '<fen>' + fen,
                '<fen style="background-color:' + fen_color + ';">' + fen)
        except KeyError:
            try:
                fen_color = color_dict_abbrev[fen]
                markup = markup.replace(
                    '<fen>' + fen,
                    '<fen style="background-color:' + fen_color + ';">' + fen)
            except KeyError:
                break

    pattern_t = re.compile('<t[^>]*>[^<]+</t>')
    for tagset in set(re.findall(pattern_t, markup)):
        markup = markup.replace(
            '<t>', '<t style="font-weight:bold; text-transform:uppercase;">')

    pattern_ex = re.compile('<fex[^>]*>[^<]+</fex>')
    ex_tags = re.findall(pattern_ex, markup)
    name_pattern = re.compile('<fex (name=".+")>.*</fex>')
    for ex_tag in ex_tags:
        fex_name_block = re.findall(name_pattern, ex_tag)[0]
        fex_name = fex_name_block.split('=')[1].strip('"')
        try:
            fex_color = color_dict[fex_name]
            markup = markup.replace(
                fex_name_block, 'style="background-color:' + fex_color + ';"')
        except KeyError:
            try:
                fex_color = color_dict_abbrev[fex_name]
                markup = markup.replace(
                    fex_name_block,
                    'style="background-color:' + fex_color + ';"')
            except KeyError:
                break

    return markup
Exemple #7
0
def getRandomColour() -> int:
    """
    Generates a random colour as a hexadecimal integer.

    Notes
    -------
    The :module:`randomcolor` library is used to generate a random colour in hex
    format. The '#' character is stripped from the string and then then string
    is converted to a hexadecimal integer.

    Returns
    -------
    Int
        A random colour represented as a hexadecimal integer.
    """
    return int(RandomColor().generate()[0].lstrip('#'), 16)
def create_colors(genomes_dict, og_id):
    '''

    '''
    OGs = list()
    for genome, genes in genomes_dict.items():
        for gene in genes:
            OGs.append(gene[-1])
    print(OGs)

    OGs = list(set(OGs))

    colors = {OG: RandomColor().generate()[0] for OG in OGs}

    # set colors for non_OG genes and target OG_ID
    colors[og_id] = "red"
    if "" in colors:
        colors[""] = "white"

    return colors
Exemple #9
0
def formatted_def(frame, markup, popover=False):
    frame_elements = frame['elements']
    colors = RandomColor(seed=frame['name']).generate(
        count=len(frame_elements), luminosity='light')
    color_dict = {f['fe_name']: c for f, c in zip(frame_elements, colors)}
    color_dict_abbrev = {
        f['abbrev']: c
        for f, c in zip(frame_elements, colors)
    }

    if popover == True:
        element_dict = {
            element['fe_name']: formatted_def(frame, element['def_markup'])
            for element in frame['elements']
        }

    else:
        element_dict = None

    pattern_fen = re.compile('<fen[^>]*>[^<]+</fen>')
    for tagset in set(re.findall(pattern_fen, markup)):
        fen = tagset[5:-6]
        try:
            fen_color = color_dict[fen]
            if element_dict:
                fen_def = element_dict.get(fen, 'No Entry Found')
                popover_id = 'fen_popover_' + fen
                popover_div_content = '<div id=' + popover_id + ' style="display:none;"><div class="popover-body">' + fen_def + '</div></div>'
                markup = markup.replace(
                    '<fen>' + fen, popover_div_content +
                    '<fen data-trigger="hover focus" data-toggle="popover" data-popover-content="#'
                    + popover_id + '" style="background-color:' + fen_color +
                    ';">' + fen)
            else:
                markup = markup.replace(
                    '<fen>' + fen,
                    '<fen style="background-color:' + fen_color + ';">' + fen)
        except KeyError:
            try:
                fen_color = color_dict_abbrev[fen]
                if element_dict:
                    popover_id = 'fen_popover_' + fen
                    popover_div_content = '<div id=' + popover_id + ' style="display:none;"><div class="popover-body">' + fen_def + '</div></div>'
                    markup = markup.replace(
                        '<fen>' + fen, popover_div_content +
                        '<fen data-trigger="hover focus" data-toggle="popover" data-popover-content="#'
                        + popover_id + '" style="background-color:' +
                        fen_color + ';">' + fen)
                else:
                    markup = markup.replace(
                        '<fen>' + fen, '<fen style="background-color:' +
                        fen_color + ';">' + fen)
            except KeyError:
                break

    pattern_t = re.compile('<t[^>]*>[^<]+</t>')
    for tagset in set(re.findall(pattern_t, markup)):
        markup = markup.replace(
            '<t>', '<t style="font-weight:bold; text-transform:uppercase;">')

    pattern_ex = re.compile('<fex[^>]*>[^<]+</fex>')
    ex_tags = re.findall(pattern_ex, markup)
    name_pattern = re.compile('<fex (name=".+")>.*</fex>')
    for ex_tag in ex_tags:
        try:
            fex_name_block = re.findall(name_pattern, ex_tag)[0]
            fex_name = fex_name_block.split('=')[1].strip('"')
            try:
                fex_color = color_dict[fex_name]
                markup = markup.replace(
                    fex_name_block,
                    'style="background-color:' + fex_color + ';"')
            except KeyError:
                try:
                    fex_color = color_dict_abbrev[fex_name]
                    markup = markup.replace(
                        fex_name_block,
                        'style="background-color:' + fex_color + ';"')
                except KeyError:
                    break
        except IndexError:
            pass

    return markup
Exemple #10
0
def get_palette(df):
    """Create color pallette so that static map and animation routes match"""
    each_route = df['route_id'].unique()
    pal = {r: RandomColor().generate()[0] for r in each_route}
    return pal
ap.add_argument('-l',
                '--luminosity',
                type=str,
                default=None,
                help='''
Controls the luminosity of the generated color. You can specify a
string containing bright, light, or dark.''')

# Seed
# FIXME: doesnt work
#ap.add_argument('-s','--seed', type = int, default=None, help='''
#An integer which when passed will cause randomColor to return
#the same color each time.''')

# Format
ap.add_argument('-f',
                '--format',
                type=str,
                default=None,
                help='''
A string which specifies the format of the generated color. Possible
values are rgb, rgbArray, hsl, hslArray and hex (default).''')

# Parse input arguments.
args = vars(ap.parse_args())
args = rmNone(args)

# Generate n random colors.
rand_color = RandomColor()
print(rand_color.generate(**args))
Exemple #12
0
import re
import cv2
import time
import threading
import numpy as np
from randomcolor import RandomColor
from skimage.measure import find_contours

randomcolor = RandomColor()
color_regex = re.compile(r'(\d+)')


def random_colors(count):
    return list(
        map(lambda x: list(map(int, color_regex.findall(x))),
            randomcolor.generate(count=count, format_='rgb')))


def apply_mask(image, mask, color=None, alpha=0.5):
    if not color:
        color = random_colors(1)[0]
    for i in range(3):
        image[:, :,
              i] = np.where(mask,
                            image[:, :, i] * (1 - alpha) + alpha * color[i],
                            image[:, :, i])
    return image


def mask_image(image,
               boxes,
Exemple #13
0
def main(variant):
    HYPERBAND_MAX_EPOCHS = 50
    EXECUTION_PER_TRIAL = 2
    SEED = 13377331
    # ,'../data/FunLines/task-1/preproc/2_concat_train.bin'
    train_path, dev_path, test_path = [
        '../data/task-1/preproc/2_concat_train.bin'
    ], ['../data/task-1/preproc/2_concat_dev.bin'
        ], ['../data/task-1/preproc/2_concat_test.bin']
    if variant == 'HUMOR':
        params = json.load(open("./lib/models/tuning/model.json", 'r'))

        train_data = load_data(train_path)
        dev_data = load_data(dev_path)
        test_data = load_data(test_path)

        features = [
            PhoneticFeature, PositionFeature, DistanceFeature, SentLenFeature,
            NellKbFeature
        ]

        train_data.AddFeatures(features)
        dev_data.AddFeatures(features)
        test_data.AddFeatures(features)

        features, train_y = train_data.GetFeatureVectors(
        ), train_data.GetGrades()
        ins = {"FeatureInput": features[:, :4]}
        i = 4
        ins["EntityInput"] = features[:, i:]

        # text = np.load('../data/task-1/train_replaced.npy', allow_pickle=True) #
        text = train_data.GetReplaced()
        ins["ReplacedInput"] = text

        # text = np.load('../data/task-1/train_edit.npy', allow_pickle=True) #
        text = train_data.GetEdits()
        ins["ReplacementInput"] = text

        # Dev data
        dev_features, dev_y = dev_data.GetFeatureVectors(), dev_data.GetGrades(
        )
        devIns = {"FeatureInput": dev_features[:, :4]}
        i = 4
        devIns["EntityInput"] = dev_features[:, i:]

        # text = np.load('../data/task-1/dev_replaced.npy', allow_pickle=True) #
        text = dev_data.GetReplaced()
        devIns["ReplacedInput"] = text

        # text = np.load('../data/task-1/dev_edit.npy', allow_pickle=True) #
        text = dev_data.GetEdits()
        devIns["ReplacementInput"] = text

        # Test data
        test_features, test_y = test_data.GetFeatureVectors(
        ), test_data.GetGrades()
        testIns = {"FeatureInput": test_features[:, :4]}
        i = 4
        testIns["EntityInput"] = test_features[:, i:]

        # text = np.load('../data/task-1/test_replaced.npy', allow_pickle=True) #
        text = test_data.GetReplaced()
        testIns["ReplacedInput"] = text

        # text = np.load('../data/task-1/test_edit.npy', allow_pickle=True) #
        text = test_data.GetEdits()
        testIns["ReplacementInput"] = text

        early = tf.keras.callbacks.EarlyStopping(
            monitor='val_root_mean_squared_error',
            min_delta=0.0001,
            patience=5,
            mode='min',
            restore_best_weights=True)

        score = []
        for i in range(10):
            model = create_HUMOR2_model(4, 25, 128, params["hyperparameters"])
            model.fit(x=ins,
                      y=train_y,
                      validation_data=(devIns, dev_y),
                      batch_size=16,
                      epochs=40,
                      shuffle=True,
                      callbacks=[early])

            preds = model.predict(testIns)
            score.append(
                mean_squared_error(test_y, round_numbers(preds),
                                   squared=False))
            print(score[i])
            del model
            # tf.reset_default_graph()
            tf.keras.backend.clear_session()
            gc.collect()
        score = np.array(score)
        print(f'{variant}: Mean: {score.mean()}, STD: {score.std()}')

    elif variant == 'HUMOR2':
        params = json.load(open("./lib/models/tuning/model2.json", 'r'))

        train_data = load_data(train_path)
        dev_data = load_data(dev_path)
        test_data = load_data(test_path)

        features = [
            PhoneticFeature, PositionFeature, DistanceFeature, SentLenFeature,
            NellKbFeature, AlbertTokenizer
        ]

        train_data.AddFeatures(features)
        dev_data.AddFeatures(features)
        test_data.AddFeatures(features)

        features, train_y = train_data.GetFeatureVectors(
        ), train_data.GetGrades()
        ins = {"FeatureInput": features[:, :4]}
        i = 4
        ins["EntityInput"] = features[:, i:i + 25]
        i += 25
        ins["input_word_ids"] = features[:, i:i + 128]
        i += 128
        ins["segment_ids"] = features[:, i:i + 128]
        i += 128
        ins["input_mask"] = features[:, i:i + 128]

        text = train_data.GetReplaced()
        ins["ReplacedInput"] = text

        text = train_data.GetEdits()
        ins["ReplacementInput"] = text

        # Dev data
        dev_features, dev_y = dev_data.GetFeatureVectors(), dev_data.GetGrades(
        )
        devIns = {"FeatureInput": dev_features[:, :4]}
        i = 4
        devIns["EntityInput"] = dev_features[:, i:i + 25]
        i += 25
        devIns["input_word_ids"] = dev_features[:, i:i + 128]
        i += 128
        devIns["segment_ids"] = dev_features[:, i:i + 128]
        i += 128
        devIns["input_mask"] = dev_features[:, i:i + 128]

        text = dev_data.GetReplaced()
        devIns["ReplacedInput"] = text

        text = dev_data.GetEdits()
        devIns["ReplacementInput"] = text

        # Test data
        test_features, test_y = test_data.GetFeatureVectors(
        ), test_data.GetGrades()
        testIns = {"FeatureInput": test_features[:, :4]}
        i = 4
        testIns["EntityInput"] = test_features[:, i:i + 25]
        i += 25
        testIns["input_word_ids"] = test_features[:, i:i + 128]
        i += 128
        testIns["segment_ids"] = test_features[:, i:i + 128]
        i += 128
        testIns["input_mask"] = test_features[:, i:i + 128]

        text = test_data.GetReplaced()
        testIns["ReplacedInput"] = text

        text = test_data.GetEdits()
        testIns["ReplacementInput"] = text

        early = tf.keras.callbacks.EarlyStopping(
            monitor='val_root_mean_squared_error',
            min_delta=0.0001,
            patience=5,
            mode='min',
            restore_best_weights=True)

        score = []
        for i in range(10):
            model = create_HUMOR2_model(4, 25, 128, params["hyperparameters"])
            model.fit(x=ins,
                      y=train_y,
                      validation_data=(devIns, dev_y),
                      batch_size=16,
                      epochs=25,
                      shuffle=True,
                      callbacks=[early])

            preds = model.predict(testIns)
            score.append(mean_squared_error(test_y, preds, squared=False))
            del model
            # tf.reset_default_graph()
            tf.keras.backend.clear_session()
            gc.collect()
        score = np.array(score)
        print(f'{variant}: Mean: {score.mean()}, STD: {score.std()}')
    elif variant == 'TASK2INFER':
        model = './headline_regression/20200308-194029-BEST/weights/final.hdf5'
        infer = Task2Inference(model,
                               '../data/task-2/preproc/2_concat_test.bin')
        infer.predict('../data/task-2/predictions/task-2-output.csv')
    elif variant == 'TESTINFER':
        preds = 'task-1-output.context.csv'
        test = load_data(test_path)
        y = test.GetGrades()
        with open(preds, 'r') as f:
            i = 0
            pred_list = []
            for line in f:
                if i == 0:
                    i = 1
                else:
                    pred_list.append(float(line.strip().split(',')[1]))
        rmse = mean_squared_error(y, np.array(pred_list), squared=False)
        print(rmse)

    elif variant == 'NAM':
        model = create_NAM_model(1, 181544, 832)
        data_path = '../data/NELL/NELLRDF.xml'
        ent_vocab = '../data/NELL/NELLWordNetVocab.txt'
        rel_vocab = '../data/NELL/NELLRelVocab.txt'
        trainer = NAMTraining(model, data_path, ent_vocab, rel_vocab)
        trainer.train(30, 2048)
        trainer.test()
    elif variant == 'TUNING':
        model = HumorTuner(4, 20)
        tuner = Hyperband(model,
                          max_epochs=HYPERBAND_MAX_EPOCHS,
                          objective=kerastuner.Objective(
                              "val_root_mean_squared_error", direction="min"),
                          seed=SEED,
                          executions_per_trial=EXECUTION_PER_TRIAL,
                          hyperband_iterations=2,
                          directory=f'tuning_hyperband',
                          project_name='ContextHumor')

        tuner.search_space_summary()

        ## Loading the data
        train_data = load_data(train_path)
        dev_data = load_data(dev_path)
        features = [
            PhoneticFeature, PositionFeature, DistanceFeature, SentLenFeature,
            NellKbFeature
        ]
        train_data.AddFeatures(features)
        dev_data.AddFeatures(features)

        features, train_y = train_data.GetFeatureVectors(
        ), train_data.GetGrades()
        ins = {"FeatureInput": features[:, :4]}
        i = 4
        ins["EntityInput"] = features[:, i:i + 20]

        ins["ReplacedInput"] = train_data.GetReplaced()
        ins["ReplacementInput"] = train_data.GetEdits()

        # Dev data
        dev_features, dev_y = dev_data.GetFeatureVectors(), dev_data.GetGrades(
        )
        devIns = {"FeatureInput": dev_features[:, :4]}
        i = 4
        devIns["EntityInput"] = dev_features[:, i:i + 20]

        devIns["ReplacedInput"] = dev_data.GetReplaced()
        devIns["ReplacementInput"] = dev_data.GetEdits()

        early = tf.keras.callbacks.EarlyStopping(
            monitor='val_root_mean_squared_error',
            min_delta=0.0005,
            patience=2,
            mode='min',
            restore_best_weights=True)

        tuner.oracle.hyperband_iterations = 2

        tuner.search(ins,
                     train_y,
                     epochs=HYPERBAND_MAX_EPOCHS,
                     batch_size=64,
                     validation_data=(devIns, dev_y),
                     callbacks=[early])

        tuner.results_summary()
    elif variant == 'TUNINGSERVER':
        params = json.load(open("./lib/models/tuning/model.json", 'r'))
        model = HumorTunerServer(4, 20, 128, params["hyperparameters"])
        tuner = Hyperband(model,
                          max_epochs=HYPERBAND_MAX_EPOCHS,
                          objective=kerastuner.Objective(
                              "val_root_mean_squared_error", direction="min"),
                          seed=SEED,
                          executions_per_trial=EXECUTION_PER_TRIAL,
                          hyperband_iterations=1,
                          directory=f'tuning_hyperband',
                          project_name='ContextHumor')

        tuner.search_space_summary()

        ## Loading the data
        train_data = load_data(train_path)
        dev_data = load_data(dev_path)
        features = [
            PhoneticFeature, PositionFeature, DistanceFeature, SentLenFeature,
            NellKbFeature, AlbertTokenizer
        ]
        train_data.AddFeatures(features)
        dev_data.AddFeatures(features)

        features, train_y = train_data.GetFeatureVectors(
        ), train_data.GetGrades()
        ins = {"FeatureInput": features[:, :4]}
        i = 4
        ins["EntityInput"] = features[:, i:i + 20]
        i += 20
        ins["input_word_ids"] = features[:, i:i + 128]
        i += 128
        ins["segment_ids"] = features[:, i:i + 128]
        i += 128
        ins["input_mask"] = features[:, i:i + 128]

        text = train_data.GetReplaced()
        ins["ReplacedInput"] = text

        text = train_data.GetEdits()
        ins["ReplacementInput"] = text

        # Dev data
        dev_features, dev_y = dev_data.GetFeatureVectors(), dev_data.GetGrades(
        )
        devIns = {"FeatureInput": dev_features[:, :4]}
        i = 4
        devIns["EntityInput"] = dev_features[:, i:i + 20]
        i += 20
        devIns["input_word_ids"] = dev_features[:, i:i + 128]
        i += 128
        devIns["segment_ids"] = dev_features[:, i:i + 128]
        i += 128
        devIns["input_mask"] = dev_features[:, i:i + 128]

        text = dev_data.GetReplaced()
        devIns["ReplacedInput"] = text

        text = dev_data.GetEdits()
        devIns["ReplacementInput"] = text

        early = tf.keras.callbacks.EarlyStopping(
            monitor='val_root_mean_squared_error',
            min_delta=0.0005,
            patience=2,
            mode='min',
            restore_best_weights=True)

        tuner.search(ins,
                     train_y,
                     epochs=HYPERBAND_MAX_EPOCHS,
                     batch_size=64,
                     validation_data=(devIns, dev_y),
                     callbacks=[early])

        tuner.results_summary()
    elif variant == 'PLOT':
        axes = [
            "feature_units1", "feature_units2", "entity_units1",
            "entity_units2", "sentence_units1", "sentence_units2",
            "sentence_units3"
        ]
        models = json.load(open("./lib/models/tuning/result_summary.json",
                                'r'))
        params = defaultdict(list)

        for model in models["top_10"]:
            t_id = model["TrialID"]
            model_param = json.load(
                open(f"./tuning_hyperband/HumorHumor/trial_{t_id}/trial.json",
                     "r"))
            for a in axes:
                params[a].append(model_param["hyperparameters"]["values"][a])
            params["score"].append(model["Score"])

        fig = go.Figure(
            data=go.Parcoords(line_color='green',
                              dimensions=list([
                                  dict(range=[8, 128],
                                       label='Feature Layer 1',
                                       values=params[axes[0]]),
                                  dict(range=[8, 128],
                                       label='Feature Layer 2',
                                       values=params[axes[1]]),
                                  dict(range=[8, 128],
                                       label='Knowledge Layer 1',
                                       values=params[axes[2]]),
                                  dict(range=[8, 128],
                                       label='Knowledge Layer 2',
                                       values=params[axes[3]]),
                                  dict(range=[32, 512],
                                       label='Word Layer 2',
                                       values=params[axes[4]]),
                                  dict(range=[32, 512],
                                       label='Word Layer 1',
                                       values=params[axes[5]]),
                                  dict(range=[8, 128],
                                       label='Word Layer 2',
                                       values=params[axes[6]]),
                                  dict(range=[0, 1],
                                       label='Root Mean Square Error',
                                       values=params["score"]),
                              ])))

        fig.show()

    elif variant == 'MultiCNN':
        train_data = load_data(train_path)
        dev_data = load_data(dev_path)
        test_data = load_data(test_path)
        with codecs.open('../data/vocab/train_vocab.json',
                         encoding='utf-8') as fp:
            vocab_dict = json.load(fp)

        max_length = longest(train_data.GetTokenizedWEdit())
        ins = {
            "TextIn":
            convert_to_index(vocab_dict, train_data.GetTokenizedWEdit(),
                             max_length)
        }
        train_y = train_data.GetGrades()
        devIns = {
            "TextIn":
            convert_to_index(vocab_dict, dev_data.GetTokenizedWEdit(),
                             max_length)
        }
        dev_y = dev_data.GetGrades()
        testIns = {
            "TextIn":
            convert_to_index(vocab_dict, test_data.GetTokenizedWEdit(),
                             max_length)
        }
        test_y = test_data.GetGrades()
        early = tf.keras.callbacks.EarlyStopping(
            monitor='val_root_mean_squared_error',
            min_delta=0.0001,
            patience=5,
            mode='min',
            restore_best_weights=True)
        lr_schedule = create_learning_rate_scheduler(max_learn_rate=1e-1,
                                                     end_learn_rate=1e-6,
                                                     warmup_epoch_count=15,
                                                     total_epoch_count=40)
        score = []
        for i in range(10):
            model = create_MultiCNN_model()
            model.fit(x=ins,
                      y=train_y,
                      validation_data=(devIns, dev_y),
                      batch_size=16,
                      epochs=40,
                      shuffle=True,
                      callbacks=[early])

            preds = model.predict(testIns)
            score.append(mean_squared_error(test_y, preds, squared=False))
            del model
            # tf.reset_default_graph()
            tf.keras.backend.clear_session()
            gc.collect()
        score = np.array(score)
        print(f'{variant}: Mean: {score.mean()}, STD: {score.std()}')

        # preds = model.predict(devIns)
        # ids = dev_data.GetIDs()
        # out = np.stack((ids, preds.flatten()), axis=-1)
        # Save the predictions to file
        # np.savetxt(f'../plots/{variant}.csv', out, header='id,pred', fmt="%d,%1.8f")

        # print(f'Mean of preds: {preds.mean()}, STD of preds: {preds.std()}, Mean of true: {dev_y.mean()}, STD of true: {dev_y.std()}')
        # bins = np.linspace(0, 3, 50)
        # plt.hist(preds, bins=bins, alpha=0.5, label="preds")
        # plt.hist(dev_y, bins=bins, alpha=0.5, label="true")
        # plt.legend(loc='upper right')
        # plt.show()
        # del model

    elif variant == 'CNN':
        # model = create_CNN_model()
        train_data = load_data(train_path)
        dev_data = load_data(dev_path)
        test_data = load_data(test_path)
        with codecs.open('../data/vocab/train_vocab.json',
                         encoding='utf-8') as fp:
            vocab_dict = json.load(fp)

        max_length = longest(train_data.GetTokenizedWEdit())
        ins = {
            "TextIn":
            convert_to_index(vocab_dict, train_data.GetTokenizedWEdit(),
                             max_length)
        }
        train_y = train_data.GetGrades()
        devIns = {
            "TextIn":
            convert_to_index(vocab_dict, dev_data.GetTokenizedWEdit(),
                             max_length)
        }
        dev_y = dev_data.GetGrades()
        testIns = {
            "TextIn":
            convert_to_index(vocab_dict, test_data.GetTokenizedWEdit(),
                             max_length)
        }
        test_y = test_data.GetGrades()
        early = tf.keras.callbacks.EarlyStopping(
            monitor='val_root_mean_squared_error',
            min_delta=0.0001,
            patience=5,
            mode='min',
            restore_best_weights=True)
        score = []
        for i in range(10):
            model = create_CNN_model()
            model.fit(x=ins,
                      y=train_y,
                      validation_data=(devIns, dev_y),
                      batch_size=16,
                      epochs=40,
                      shuffle=True,
                      callbacks=[early])

            preds = model.predict(testIns)
            score.append(mean_squared_error(test_y, preds, squared=False))
            del model
            # tf.reset_default_graph()
            tf.keras.backend.clear_session()
            gc.collect()
        score = np.array(score)
        print(f'{variant}: Mean: {score.mean()}, STD: {score.std()}')

        # preds = model.predict(devIns)
        # ids = dev_data.GetIDs()
        # out = np.stack((ids, preds.flatten()), axis=-1)
        # Save the predictions to file
        # np.savetxt(f'../plots/{variant}.csv', out, header='id,pred', fmt="%d,%1.8f")

        # print(f'Mean of preds: {preds.mean()}, STD of preds: {preds.std()}, Mean of true: {dev_y.mean()}, STD of true: {dev_y.std()}')
        # bins = np.linspace(0, 3, 50)
        # plt.hist(preds, bins=bins, alpha=0.5, label="preds")
        # plt.hist(dev_y, bins=bins, alpha=0.5, label="true")
        # plt.legend(loc='upper right')
        # plt.show()
        # del model

    elif variant == 'KBLSTM':
        train_data = load_data(train_path)
        train_data.AddFeatures([NellKbFeature])
        dev_data = load_data(dev_path)
        dev_data.AddFeatures([NellKbFeature])
        test_data = load_data(test_path)
        test_data.AddFeatures([NellKbFeature])
        with codecs.open('../data/vocab/train_vocab.json',
                         encoding='utf-8') as fp:
            vocab_dict = json.load(fp)

        max_length = longest(train_data.GetTokenizedWEdit())
        train = convert_to_index(vocab_dict, train_data.GetTokenizedWEdit(),
                                 max_length)
        ins = {"TextIn": train, "EntityInput": train_data.GetFeatureVectors()}
        train_y = train_data.GetGrades()
        dev = convert_to_index(vocab_dict, dev_data.GetTokenizedWEdit(),
                               max_length)
        devIns = {"TextIn": dev, "EntityInput": dev_data.GetFeatureVectors()}
        dev_y = dev_data.GetGrades()
        test = convert_to_index(vocab_dict, test_data.GetTokenizedWEdit(),
                                max_length)
        testIns = {
            "TextIn": test,
            "EntityInput": test_data.GetFeatureVectors()
        }
        test_y = test_data.GetGrades()
        early = tf.keras.callbacks.EarlyStopping(
            monitor='val_root_mean_squared_error',
            min_delta=0.0001,
            patience=5,
            mode='min',
            restore_best_weights=True)
        score = []
        for i in range(10):
            model = create_KBLSTM_model()
            model.fit(x=ins,
                      y=train_y,
                      validation_data=(devIns, dev_y),
                      batch_size=16,
                      epochs=40,
                      shuffle=True,
                      callbacks=[early])

            preds = model.predict(testIns)
            score.append(mean_squared_error(test_y, preds, squared=False))
            del model
            # tf.reset_default_graph()
            tf.keras.backend.clear_session()
            gc.collect()
        score = np.array(score)
        print(f'{variant}: Mean: {score.mean()}, STD: {score.std()}')

        # preds = model.predict(devIns)
        # ids = dev_data.GetIDs()
        # out = np.stack((ids, preds.flatten()), axis=-1)
        # Save the predictions to file
        # np.savetxt(f'../plots/{variant}.csv', out, header='id,pred', fmt="%d,%1.8f")

        # print(f'Mean of preds: {preds.mean()}, STD of preds: {preds.std()}, Mean of true: {dev_y.mean()}, STD of true: {dev_y.std()}')
        # bins = np.linspace(0, 3, 50)
        # plt.hist(preds, bins=bins, alpha=0.5, label="preds")
        # plt.hist(dev_y, bins=bins, alpha=0.5, label="true")
        # plt.legend(loc='upper right')
        # plt.show()
        del model

    elif variant == 'NNLM':
        train_data = load_data(train_path)
        dev_data = load_data(dev_path)
        test_data = load_data(test_path)

        ins = {"sentence_in": train_data.GetEditSentences()}
        devIns = {"sentence_in": dev_data.GetEditSentences()}
        testIns = {"sentence_in": test_data.GetEditSentences()}
        train_y = train_data.GetGrades()
        dev_y = dev_data.GetGrades()
        test_y = test_data.GetGrades()
        early = tf.keras.callbacks.EarlyStopping(
            monitor='val_root_mean_squared_error',
            min_delta=0.0001,
            patience=5,
            mode='min',
            restore_best_weights=True)
        score = []
        for i in range(10):
            model = create_BERT_model()
            model.fit(x=ins,
                      y=train_y,
                      validation_data=(devIns, dev_y),
                      batch_size=16,
                      epochs=40,
                      shuffle=True,
                      callbacks=[early])

            preds = model.predict(testIns)
            score.append(mean_squared_error(test_y, preds, squared=False))
            del model
            # tf.reset_default_graph()
            tf.keras.backend.clear_session()
            gc.collect()
        score = np.array(score)
        print(f'{variant}: Mean: {score.mean()}, STD: {score.std()}')

        # preds = model.predict(devIns)
        # ids = dev_data.GetIDs()
        # out = np.stack((ids, preds.flatten()), axis=-1)
        # Save the predictions to file
        # np.savetxt(f'../plots/{variant}.csv', out, header='id,pred', fmt="%d,%1.8f")

        # print(f'Mean of preds: {preds.mean()}, STD of preds: {preds.std()}, Mean of true: {dev_y.mean()}, STD of true: {dev_y.std()}')
        # bins = np.linspace(0, 3, 50)
        # plt.hist(preds, bins=bins, alpha=0.5, label="preds")
        # plt.hist(dev_y, bins=bins, alpha=0.5, label="true")
        # plt.legend(loc='upper right')
        # plt.show()
        # del model

    elif variant == 'LINEAR':
        train = load_data(train_path)
        dev = load_data(dev_path)

        features = [
            PhoneticFeature, PositionFeature, DistanceFeature, SentLenFeature
        ]

        train.AddFeatures(features)
        dev.AddFeatures(features)

        X, y = train.GetFeatureVectors(), train.GetGrades()
        X_dev, dev_y = dev.GetFeatureVectors(), dev.GetGrades()

        reg = LinearRegression(n_jobs=-1).fit(X, y)

        preds = reg.predict(X_dev)
        rmse = mean_squared_error(test_y, preds, squared=False)
        # ids = dev.GetIDs()
        # out = np.stack((ids, preds.flatten()), axis=-1)
        # Save the predictions to file
        # np.savetxt(f'../plots/{variant}.csv', out, header='id,pred', fmt="%d,%1.8f")

        # print(f'Mean of preds: {preds.mean()}, STD of preds: {preds.std()}, Mean of true: {dev_y.mean()}, STD of true: {dev_y.std()}')
    elif variant == 'VOCAB':
        embed_path = '../data/embeddings/numpy/headline.npy'
        print("Loading embeddings and vocab...")
        model = Word2Vec.load('../data/embeddings/headlineEmbeds.bin')
        print("Loaded embeddings...")
        with codecs.open('../data/vocab/train_vocab.funlines.json',
                         encoding='utf-8') as fp:
            vocab_dict = json.load(fp)
        print("Loaded vocab...")

        embed_matrix = np.zeros((len(vocab_dict), 300))
        i = 0
        for k, v in vocab_dict.items():
            try:
                embed_matrix[v] = model.wv.get_vector(k)
            except KeyError:
                # print(f'{k} does not exist in FastText embeddings')
                i += 1
        print(len(vocab_dict), i)
        print("Created the embedding matrix...")
        np.save(embed_path, embed_matrix)
        print("Saved the new embeddings...")
    elif variant == 'WORD2VEC':
        print("Loading data...")
        headline_paths = [
            '../data/extra_data_sarcasm/Sarcasm_Headlines_Dataset_v2.json'
        ]
        headlines = []
        for headline_path in headline_paths:
            with open(headline_path, 'r') as fp:
                for line in fp:
                    d = json.loads(line)
                    headlines.append(text_to_word_sequence(d["headline"]))

        train_data = load_data(train_path)
        print("Train model...")
        print(len(headlines))
        headlines.extend(train_data.GetTokenizedWEdit())
        print(len(headlines))
        model = Word2Vec(headlines,
                         size=300,
                         window=14,
                         workers=4,
                         min_count=1)

        vocab = list(model.wv.vocab)
        print(len(vocab))

        print("Saving model...")
        model.save('../data/embeddings/headlineEmbeds.bin')
    elif variant == 'PCA':
        model = PCA(n_components=3)
        entities = np.load('../data/NELL/embeddings/entity.npy')
        labels = load_vocab('../data/NELL/NELLWordNetVocab_proc.txt')
        top_100 = {}
        with open('../data/NELL/top_100_nell.txt', 'r') as f:
            for line in f:
                label = line.strip()
                top_100[label] = entities[labels[label]]

        # print(entities[:4])
        # print(labels[:4])

        pca_ent = model.fit_transform(list(top_100.values()))

        # create_dendrogram(list(top_100.values()), list(top_100.keys()), 'ward')

        # print(pca_ent.shape)
        # print(pca_ent[:10])
        rand_color = RandomColor()
        fig = go.Figure(data=[
            go.Scatter3d(x=pca_ent[:, 0],
                         y=pca_ent[:, 1],
                         z=pca_ent[:, 2],
                         mode='markers',
                         text=list(top_100.keys()),
                         marker=dict(size=12,
                                     color=rand_color.generate(count=100),
                                     colorscale='Viridis',
                                     opacity=0.8))
        ])

        plotly.offline.plot(fig, filename="NELLPCA.html")
    elif variant == 'MEAN':
        files = [
            'CNN.csv', 'context.csv', 'KBLSTM.csv', 'LINEAR.csv',
            'MultiCNN.csv', 'NNLM.csv', 'simple.csv'
        ]
        for f in files:
            with open(f'../plots/{f}', 'r') as fp:
                i = 0
                vals = []
                for line in fp:
                    if i == 0:
                        i += 1
                        continue
                    vals.append(float(line.strip().split(',')[1]))
            vals = np.array(vals)
            mean, std = vals.mean(), vals.std()
            print(f'{f.split(".")[0]}: Mean: {mean}, STD: {std}')
    elif variant == 'COEF':
        train = load_data(train_path)

        features = [
            PhoneticFeature, PositionFeature, DistanceFeature, SentLenFeature
        ]

        train.AddFeatures(features)

        X, y = train.GetFeatureVectors(), train.GetGrades()
        y = np.reshape(y, (-1, 1))
        print(y.shape)
        z = np.concatenate((X, y), axis=-1).T

        coef = np.corrcoef(z).round(decimals=4)

        np.savetxt("coef.csv", coef, delimiter=',')
    elif variant == 'ALBERT':
        model = create_BERT_model()
        train = load_data(train_path)
        dev = load_data(dev_path)
        test = load_data(test_path)

        features = [AlbertTokenizer]

        train.AddFeatures(features)
        dev.AddFeatures(features)
        test.AddFeatures(features)

        features, indexes = dev.GetFeatureVectors(), dev.GetIndexes()

        ins = {}
        i = 0
        ins["input_word_ids"] = features[:, i:i + 128]
        i += 128
        ins["segment_ids"] = features[:, i:i + 128]
        i += 128
        ins["input_mask"] = features[:, i:i + 128]

        preds = model.predict(ins)
        words_train = []
        for i, pred in enumerate(preds):
            words_train.append(pred[indexes[i]])
        words_train = np.array(words_train)
        print(words_train.shape)

        np.save("./dev_edit.npy", words_train)
    elif variant == 'MEDIAN':
        train_data = load_data(train_path)
        test_data = load_data(test_path)

        train_y = train_data.GetGrades()
        test_y = test_data.GetGrades()

        pred = np.mean(train_y)
        print("Median", pred)

        pred_y = np.array([pred] * len(test_y))
        rmse = mean_squared_error(test_y, pred_y, squared=False)
        print("RMSE", rmse)