Example #1
0
def Exchange_rates(Base, Destination):
    data =requests.get('https://api.cryptonator.com/api/full/{}-{}'.format(Base, Destination))
    Data = (data.json()['ticker']['markets'])

    market = []
    price = []
    volume = []

    for A in range(len(Data)):
        market.append(Data[A]['market'])
        price.append(Data[A]['price'])
        volume.append(Data[A]['volume'])
    Difference = float(max(price))-float(min(price))
    print("Minimum Price is \t",max(price)," \tat ", market[price.index(max(price))],
          " \nMaximum Price is \t",min(price)+" \tat ", market[price.index(min(price))],
          "\ndifference \t  is\t", Difference,Destination)

    numbers = (0, len(market))

    plt.scatter(numbers, market, color='red')

    for i, txt in enumerate(price):
        plt.annotate(txt, (numbers[i], market[i]))

    plt.title('ARBITRAGE')
    plt.ylabel('#PRICE')
    plt.show()
Example #2
0
def tsne_plot(model):
    "Creates and TSNE model and plots it"
    labels = []
    tokens = []
    total_size = len(model.wv.vocab)
    probability = 200.0 / total_size
    # r =
    for word in model.wv.vocab:

        tokens.append(model[word])
        labels.append(word)

    tsne_model = TSNE(perplexity=40,
                      n_components=2,
                      init='pca',
                      n_iter=2500,
                      random_state=23)
    new_values = tsne_model.fit_transform(tokens)

    x = []
    y = []
    for value in new_values:
        x.append(value[0])
        y.append(value[1])

    plt.figure(figsize=(16, 16))
    for i in range(len(x)):
        plt.scatter(x[i], y[i])
        plt.annotate(labels[i],
                     xy=(x[i], y[i]),
                     xytext=(5, 2),
                     textcoords='offset points',
                     ha='right',
                     va='bottom')
    plt.show()
Example #3
0
def main():
    #Before anything happens, number of command-line arguments is checked and appropriate action taken.
    argumnumber()
    if (len(sys.argv) < 2):
        file = raw_input("Please provide a sorted_models file: ")
    else:
        file = sys.argv[1]
    #Prompts user to provide maxtot and maxinter before progam continues.
    global maxtot
    maxtot = float(raw_input("Please enter maximum total_score: "))
    global maxinter
    maxinter = float(raw_input("Please enter maximum interface_delta_X: "))

    #Imports DataFrame and filters based on max values provided.
    models_raw = pd.read_csv(
        file, sep=' ', names=['model', 'total_score', 'interface_delta_X'])
    models = models_raw.loc[(models_raw['total_score'] <= maxtot)
                            & (models_raw['interface_delta_X'] <= maxinter)]
    sumavrg = (
        (np.sum(models['total_score']) + np.sum(models['interface_delta_X'])) /
        (len(models['total_score'])))
    #Appends a column to models that contains the sum of total_score and interface_delta_X
    sumarr = (models['total_score'] + models['interface_delta_X'])
    models['add'] = sumarr
    #Finds values with lowest sum
    modhi = models.loc[models['add'] > sumavrg]
    modlo = models.loc[models['add'] <= sumavrg]
    minidx = models['add'].idxmin()
    xmin = models.iloc[minidx]['total_score']
    ymin = models.iloc[minidx]['interface_delta_X']

    #Creates the plot.
    x1 = modlo['total_score']
    y1 = modlo['interface_delta_X']
    x2 = modhi['total_score']
    y2 = modhi['interface_delta_X']
    plt.figure(figsize=[16, 9])
    plot1 = plt.scatter(x1, y1, s=2, c='Green', marker='.')
    plot2 = plt.scatter(x2, y2, s=2, c='Red', marker='.')
    plt.tick_params(axis='both',
                    direction='inout',
                    width=1,
                    length=6,
                    labelsize=13,
                    pad=4)
    plt.title('interface_delta_x vs total_score', size=16)
    plt.xlabel("total_score", fontsize=13)
    plt.ylabel("interface_delta_X", fontsize=13)
    plt.legend(['Sum <= average', 'Sum > average'], markerscale=7, fontsize=12)
    plt.annotate(xy=(xmin, ymin),
                 s="Lowest sum: total_score: " + str(xmin) +
                 "; interface_delta_X: " + str(ymin),
                 textcoords='axes fraction',
                 xytext=(0.6, 0.05))
    printtofile()
    plt.show()
Example #4
0
def cm_plot(original_label, predict_label, kunm, pic=None):

    prec_score = precision_score(original_label, predict_label, average=None)
    recall = recall_score(original_label, predict_label, average=None)
    f1 = f1_score(original_label, predict_label, average=None)
    cm = confusion_matrix(original_label, predict_label)
    cm_new = np.empty(shape=[5, 5])
    for x in range(5):
        t = cm.sum(axis=1)[x]
        for y in range(5):
            cm_new[x][y] = round(cm[x][y] / t * 100, 2)
    plt.figure()
    plt.matshow(cm_new, cmap=plt.cm.Blues)
    plt.colorbar()
    x_numbers = []
    y_numbers = []
    cm_percent = []
    for x in range(5):
        y_numbers.append(cm.sum(axis=1)[x])
        x_numbers.append(cm.sum(axis=0)[x])
        for y in range(5):
            percent = format(cm_new[x, y] * 100 / cm_new.sum(axis=1)[x], ".2f")
            cm_percent.append(str(percent))
            plt.annotate(format(cm_new[x, y] * 100 / cm_new.sum(axis=1)[x],
                                ".2f"),
                         xy=(y, x),
                         horizontalalignment='center',
                         verticalalignment='center',
                         fontsize=10)
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.title('confusion matrix')

    y_stage = [
        "W\n(" + str(y_numbers[0]) + ")", "N1\n(" + str(y_numbers[1]) + ")",
        "N2\n(" + str(y_numbers[2]) + ")", "N3\n(" + str(y_numbers[3]) + ")",
        "REM\n(" + str(y_numbers[4]) + ")"
    ]
    x_stage = [
        "W\n(" + str(x_numbers[0]) + ")", "N1\n(" + str(x_numbers[1]) + ")",
        "N2\n(" + str(x_numbers[2]) + ")", "N3\n(" + str(x_numbers[3]) + ")",
        "REM\n(" + str(x_numbers[4]) + ")"
    ]
    y = [0, 1, 2, 3, 4]
    plt.xticks(y, x_stage)
    plt.yticks(y, y_stage)
    #sns.heatmap(cm_percent, fmt='g', cmap="Blues", annot=True, cbar=False, xticklabels=x_stage, yticklabels=y_stage)  # 画热力图,annot=True 代表 在图上显示 对应的值, fmt 属性 代表输出值的格式,cbar=False, 不显示 热力棒

    plt.savefig(savepath + "matrix" + str(kunm) + ".svg")
    #plt.show()
    plt.show()
    plt.close()
    # plt.savefig("/home/data_new/zhangyongqing/flx/pythoncode/"+str(knum)+"matrix.jpg")
    return kappa(cm), classification_report(original_label, predict_label)
Example #5
0
def cm_plot(original_label, predict_label, kunm, savepath):

    prec_score = precision_score(original_label, predict_label, average=None)
    recall = recall_score(original_label, predict_label, average=None)
    f1 = f1_score(original_label, predict_label, average=None)
    cm = confusion_matrix(original_label, predict_label)
    cm_new = np.empty(shape=[5, 5])
    for x in range(5):
        t = cm.sum(axis=1)[x]
        for y in range(5):
            cm_new[x][y] = round(cm[x][y] / t * 100, 2)
    plt.figure()
    plt.matshow(cm_new, cmap=plt.cm.Blues)
    plt.colorbar()
    x_numbers = []
    y_numbers = []
    cm_percent = []
    for x in range(5):
        y_numbers.append(cm.sum(axis=1)[x])
        x_numbers.append(cm.sum(axis=0)[x])
        for y in range(5):
            percent = format(cm_new[x, y] * 100 / cm_new.sum(axis=1)[x], ".2f")
            cm_percent.append(str(percent))
            plt.annotate(format(cm_new[x, y] * 100 / cm_new.sum(axis=1)[x],
                                ".2f"),
                         xy=(y, x),
                         horizontalalignment='center',
                         verticalalignment='center',
                         fontsize=10)
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.title('confusion matrix')

    y_stage = [
        "W\n(" + str(y_numbers[0]) + ")", "N1\n(" + str(y_numbers[1]) + ")",
        "N2\n(" + str(y_numbers[2]) + ")", "N3\n(" + str(y_numbers[3]) + ")",
        "REM\n(" + str(y_numbers[4]) + ")"
    ]
    x_stage = [
        "W\n(" + str(x_numbers[0]) + ")", "N1\n(" + str(x_numbers[1]) + ")",
        "N2\n(" + str(x_numbers[2]) + ")", "N3\n(" + str(x_numbers[3]) + ")",
        "REM\n(" + str(x_numbers[4]) + ")"
    ]
    y = [0, 1, 2, 3, 4]
    plt.xticks(y, x_stage)
    plt.yticks(y, y_stage)

    plt.savefig(savepath + "matrix" + str(kunm) + ".svg")
    plt.show()
    plt.close()
    return kappa(cm), classification_report(original_label, predict_label)
Example #6
0
def cm_plot(original_label, predict_label, kunm, pic=None):
    cm = confusion_matrix(original_label, predict_label)
    print('kappa:', kappa(cm))
    plt.figure()
    plt.matshow(cm, cmap=plt.cm.Blues)
    plt.colorbar()
    for x in range(len(cm)):
        for y in range(len(cm)):
            plt.annotate(cm[x, y],
                         xy=(x, y),
                         horizontalalignment='center',
                         verticalalignment='center')

    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.title('confusion matrix')

    if pic is not None:
        plt.savefig(str(pic) + '.svg')
    # plt.xticks(('Wake','N1','N2','N3','REM'))
    # plt.yticks(('Wake','N1','N2','N3','REM'))
    plt.savefig(savepath + "cnnmatrix" + str(kunm) + ".svg")
    plt.show()
# loop over the lenses, shaded histograms
for il, lens in enumerate(lenses):
    h, be = np.histogram(H0s_list[il], bins=nbins, density=True)
    xs = [(b+be[ind+1])/2. for ind, b in enumerate(be[:-1])]
    plt.plot(xs, h, alpha=0.5, color=colors[il], linewidth=0.0)
    plt.fill_between(
            xs, h, alpha=0.5, color=colors[il], linewidth=0.0,
            label=r'%s' % lens.longname)

    # add the values
    pcs = np.percentile(H0s_list[il], q=percentiles)
    txt = r'$H_{0}: $' + \
            title.format(fmt(pcs[1]), fmt(pcs[1]-pcs[0]), fmt(pcs[2]-pcs[1]))
    plt.annotate(
            txt, xy=(0.0, 0.0), xytext=(0.02, 0.9-0.1*il),
            xycoords='axes fraction', fontsize=18, color=colors[il])


# plot the combined result
h, be = np.histogram(H0s_list[-1], bins=nbins, density=True)
xs = [(b+be[ind+1])/2. for ind, b in enumerate(be[:-1])]
plt.plot(xs, h, alpha=1.0, color=colors[-1], linewidth=2.0, label=r'All')

# add the values
pcs = np.percentile(H0s_list[-1], q=percentiles)
txt = r'$H_{0}: $' + title.format(fmt(pcs[1]), fmt(pcs[1] - pcs[0]), fmt(pcs[2] - pcs[1]))
plt.annotate(txt, xy=(0.0, 0.0), xytext=(0.02, 0.9 - 0.1 * len(lenses)), xycoords='axes fraction',
fontsize=18, color=colors[-1])

Example #8
0
def main(): 
    global POPULATION_SIZE 
    global GENERATIONS
    global SOLUTION_FOUND

    print("Parameters for run: \n")
    print("Generations: \n", GENERATIONS)
    print("Number of parents: \n", NO_OF_PARENTS)
    print("Number of genes: \n", NO_OF_GENES)
    print("Mutation rate: \n", MUTATION_RATE)
    print("Crossover rate: \n", CROSSOVER_RATE)

    if FITNESS_CHOICE == 5:
        print('Knapsack List is as follows: ')
        for x in KNAPSACK:
            print("Item No: ", x, "Weight: ", KNAPSACK[x][0], "Value: ", KNAPSACK[x][1])

    gen_count = 1

    population = generate_population(POPULATION_SIZE)

    fitness = [compute_fitness(x) for x in population]

    if check_solution(population):
        print("Best individual found in", gen_count, " generations")
        SOLUTION_FOUND = True
    else:
        gen_count += 1

    while gen_count <= GENERATIONS and SOLUTION_FOUND != True:
        next_gen = []

        parents = selection(population, fitness, NO_OF_PARENTS)
        offspring = crossover(parents, POPULATION_SIZE-NO_OF_PARENTS)
        offspring = [mutation(x) for x in offspring]

        next_gen += offspring
        population = next_gen

        fitness = [compute_fitness(x) for x in population]
        # Index of fittest individual
        fitness_index = fitness.index(max(fitness))

        best_individual = population[fitness_index]

        print("Generation: ", gen_count, " Max fitness: ", max(fitness), " Best individual: ", best_individual)

        gen_count += 1


    # Disclaimer: Graph Code taken from a friends project

    # Visualise the Travelling Salesman Problem
    if FITNESS_CHOICE == 6:
        for x in range(len(population[fitness_index])):
            pt1 = population[fitness_index][x]
            try:
                pt2 = population[fitness_index][x + 1]
            except IndexError:
                pt2 = population[fitness_index][0]

            plt.plot([pt1.pos[0], pt2.pos[0]], [pt1.pos[1], pt2.pos[1]])

        # Plot individual points on the 'board'
        points = [x.pos for x in population[fitness_index]]
        x, y = zip(*points)
        plt.scatter(x, y, s=40)

        for x in population[fitness_index]:
            # Annotate the City IDs
            plt.annotate(x.id, x.pos)

        plt.show()
Example #9
0
# %%
from itertools import cycle
# setup plot details
n_classes = 3
colors = cycle(['navy', 'turquoise', 'darkorange', 'cornflowerblue', 'teal'])

plt.figure(figsize=(7, 8))
# an iso-F1 curve contains all points in the precision/recall space whose F1 scores are the same.
f_scores = np.linspace(0.2, 0.95, num=5)
lines = []
labels = []
for f_score in f_scores:
    x = np.linspace(0.01, 1)
    y = f_score * x / (2 * x - f_score)
    l, = plt.plot(x[y >= 0], y[y >= 0], color='gray', alpha=0.2)
    plt.annotate('f1={0:0.1f}'.format(f_score), xy=(0.9, y[45] + 0.02))

lines.append(l)
labels.append('iso-f1 curves')
l, = plt.plot(recall["micro"], precision["micro"], color='gold', lw=2)
lines.append(l)
labels.append('micro-average Precision-recall (area = {0:0.2f})'
              ''.format(average_precision["micro"]))

for i, color in zip(range(n_classes), colors):
    l, = plt.plot(recall[i], precision[i], color=color, lw=2)
    lines.append(l)
    labels.append('Precision-recall for class {0} (area = {1:0.2f})'
                  ''.format(i, average_precision[i]))

fig = plt.gcf()
Example #10
0
import matplotlib as plt
import numpy as np

sT = np.arange(0, 40, 5)
k = 15
s0 = 10
c = 2
y0 = np.zeros(len(sT))
y1 = sT - s0  # stock only
y2 = (abs(sT - k) + sT - k) / 2 - c  # long a call
y3 = y1 - y2  # covered call
plt.ylim(-10, 30)
plt.plot(sT, y1)
plt.plot(sT, y2)
plt.plot(sT, y3, "red")
plt.plot(sY, y0, "b-.")
plt.plot([k, k], [-10, 10], "black")
title("Covered call ( long one share and short one call)")
xlabel("Stock price")
ylabel("Profit (loss)")
plt.annotate(
    "Stock only (long one share)", xy=(24, 15), xytext=(15, 20), arrowprops=dict(facecolor="blue", shrink=0.01)
)
plt.annotate("Long one share, short a call", xy=(10, 4), xytext=(9, 25), arrowprops=dict(facecolor="red", shrink=0.01))
plt.annotate("Exercise price= " + str(k), xy=(k + 0.2, -10 + 0.5))

show()
Example #11
0
'''


df = pandas.read_sql(query, conn)
conn.close()

pandas.set_option('display.max_columns', None)
df.head()

df = df.dropna()
df.head()

plt = df.plot(x = 'total_salaries', y = 'total_runs', kind = 'scatter')

for i, txt in enumerate(df.yearID):
    plt.annotate(txt, (df.total_salaries[i], df.total_runs[i]))
plt.show()

# Scales data
data = df[['total_salaries', 'total_runs']]
scaler = preprocessing.StandardScaler()
scaler.fit(data)
data = pandas.DataFrame(scaler.transform(data), columns = data.columns)

# Kmeans
from sklearn.cluster import KMeans
import matplotlib.pylab as plt

kmeans_est = KMeans(n_clusters=3)
kmeans_est.fit(data)
Example #12
0
def compute(inp_dataset, input_path, output_path, de_analysis, n_pass):

    print("Current pass ", n_pass)
    import json
    import matplotlib as plt
    import csv
    from sklearn.manifold import TSNE
    import matplotlib.pyplot as plt
    from sklearn.decomposition import PCA
    from decimal import Decimal
    import seaborn as sns
    import pandas as pd
    import networkx as nx
    from sklearn.cluster import DBSCAN
    from sklearn.cluster import KMeans
    import operator
    import numpy as np
    import random
    import sys

    #csvData=[['data','x','y','type']]
    print("Processing the input data into datafames....")
    csvData = []
    count = 0
    #filename = "G:/Thesis/Dropclust/plots/output_normalized_own_cc.csv" filename = "G:/Thesis/Dropclust/plots/PCA_GENES/output_normalized_own_cc.csv" filename =
    #"G:/Thesis/Dropclust/output_normalized_zscore_cc1.csv" filename = "C:/Users/Swagatam/IdeaProjects/openOrd/output_normalized_own_cc.csv"
    filename = input_path + "/output_normalized_own_cc.csv"
    coord_data = pd.read_csv(filename, names=['data', 'x', 'y'])
    coord_data.set_index('data', inplace=True)
    data = []
    data_outlier = []
    with open(filename, 'r') as csvfile:
        csvreader = csv.reader(csvfile)
        for row in csvreader:
            #f=0
            #row=[float(i) for i in row]
            data.append(row)
            temp_outlier = []
            temp_outlier.append(row[1])
            temp_outlier.append(row[2])
            data_outlier.append(temp_outlier)
            temp = row
            #if row[0].isnumeric():
            #    temp.append('cell')
            if len(row[0]) >= 16:
                temp.append('cell')
            else:
                temp.append('gene')
                count = count + 1
            csvData.append(temp)

    # # DB SCAN

    # In[20]:

    if n_pass != 4:
        noise = []
        print("Performing clustering....")
        db = DBSCAN(eps=180, min_samples=55).fit_predict(data_outlier)
        final_data = []
        csvData = [['data', 'x', 'y', 'type']]
        for i in range(0, len(list(db))):
            if db[i] != -1:
                final_data.append(data[i])
                csvData.append(data[i])
            if db[i] == -1:
                noise.append(data[i][0])
        data = final_data

        n_clusters = len(set(db)) - (1 if -1 in list(db) else 0)
        print("Clustering done. the number of obtained clusters: ", n_clusters)
    else:
        remove_data = []

        prev_df = pd.read_csv(
            "Stardust_results/visualization_output/3_pass/data.csv",
            delimiter=",",
            index_col=False)
        prev_df.set_index('data', inplace=True)
        clusters_info = []
        for i in range(0, len(csvData)):
            if csvData[i][3] == 'cell':
                if csvData[i][0] in (prev_df.index):
                    clusters_info.append(prev_df.loc[csvData[i][0]]['cluster'])
                else:
                    remove_data.append(csvData[i])
            else:
                f = 0
                import pickle
                with open(
                        'Stardust_results/visualization_output/3_pass/de_genes_cluster.txt',
                        'rb') as fp:
                    de_gene_cluster = pickle.load(fp)
                for rank in range(0, len(de_gene_cluster)):
                    if csvData[i][0] in de_gene_cluster[rank]:
                        f = 1
                        clusters_info.append(de_gene_cluster[rank].index(
                            csvData[i][0]))
                        break
                if f == 0:
                    remove_data.append(csvData[i])
        for r in remove_data:
            csvData.remove(r)
        temp = [['data', 'x', 'y', 'type']]
        temp.extend(csvData)
        csvData = temp

    # In[13]:

    # # OUTLIER VISUALIZATION

    # In[21]:
    if n_pass != 4:
        print("Starting outlier detection....")
        data_type = []
        c = 0
        g = 0
        for i in range(0, len(coord_data)):
            if db[i] != -1:
                data_type.append("data")
            else:
                if len(coord_data.index[i]) >= 16:
                    data_type.append("cell_outliers")
                else:
                    g = g + 1
                    data_type.append("gene_outliers")
        coord_data["data_type"] = data_type
        data_colors = ["lightblue"]
        if g > 0:
            noise_colors = ['blue', 'red']
        else:
            noise_colors = ['blue']
        coord_data["alpha"] = np.where(coord_data['data_type'] == 'data', 0.5,
                                       1.0)
        plt.figure(figsize=(6, 4.5))
        #ax = sns.scatterplot(x="x", y="y", data=coord_data[coord_data['alpha']==0.5],hue="data_type",palette=sns.xkcd_palette(data_colors),sizes=(50,100),size="data_type",alpha=0.3)
        #sns.scatterplot(x="x", y="y", data=coord_data[coord_data['alpha']==1.0],hue="data_type",palette=sns.xkcd_palette(noise_colors),sizes=(50,100),size="data_type",marker="^",alpha=1.0,ax=ax)
        marker = {"gene_outliers": "^", "cell_outliers": "^"}
        ax = sns.scatterplot(x="x",
                             y="y",
                             data=coord_data[coord_data['alpha'] == 0.5],
                             hue="data_type",
                             palette=sns.xkcd_palette(data_colors),
                             sizes=(50, 100),
                             size="data_type",
                             linewidth=0.0,
                             s=10,
                             alpha=0.3)
        sns.scatterplot(x="x",
                        y="y",
                        data=coord_data[coord_data['alpha'] == 1.0],
                        hue="data_type",
                        palette=sns.xkcd_palette(noise_colors),
                        sizes=(100, 50),
                        size="data_type",
                        style="data_type",
                        markers=marker,
                        alpha=1.0,
                        linewidth=0.0,
                        s=10,
                        legend='brief',
                        ax=ax)
        #plt.legend(title=='')
        ax.legend(bbox_to_anchor=(1.1, 1.05), frameon=False)
        sns.despine(bottom=False, left=False)
        plt.xlabel("dim1")
        plt.ylabel("dim2")
        plt.savefig(output_path + 'outliers_visualization.png',
                    bbox_inches='tight')
        print("Outliers removed from the dataset....")

    # # POST-HOC CLUSTER ASSIGNMENT

    # In[23]:

    print("Starting post hoc clustering....")
    neighbor_df = pd.read_hdf(
        'Stardust_results/build_output/1_pass/neighbor.h5', 'df')
    if 'Unnamed: 0' in list(neighbor_df.columns):
        neighbor_df.set_index('Unnamed: 0', inplace=True)
    p = 0
    col = list(neighbor_df.columns)
    index = list(neighbor_df.index)
    cell_dict = dict()
    column_dict = dict()
    for i in range(len(col)):
        column_dict[i] = col[i]
    for i in range(len(list(neighbor_df.index))):
        row = neighbor_df.iloc[i]
        col_ind = list(row.to_numpy().nonzero())[0]
        for ind in col_ind:
            if index[i] in cell_dict.keys():
                cell_dict[index[i]].append(column_dict[ind])
            else:
                temp = []
                temp.append(column_dict[ind])
                cell_dict[index[i]] = temp
    cluster_assign = []
    for key_cell in cell_dict.keys():
        clust = dict()
        cells = cell_dict[key_cell]
        for cell in cells:
            if n_pass == 4:
                if cell in list(prev_df.index):
                    cluster = prev_df.loc[cell]['cluster']
                else:
                    cluster = -1
            else:
                cluster = db[list(coord_data.index).index(cell)]
            if cluster not in clust.keys():
                clust[cluster] = 1
            else:
                clust[cluster] = clust[cluster] + 1
        max_cluster = max(clust.items(), key=operator.itemgetter(1))[0]
        if max_cluster == -1:
            continue
        cluster_assign.append(max_cluster)
        x_total = 0
        y_total = 0
        count = 0
        for cell in cells:
            if (n_pass != 4
                    and db[list(coord_data.index).index(cell)] == max_cluster
                ) or (n_pass == 4 and cell in list(prev_df.index)
                      and prev_df.loc[cell]['cluster'] == max_cluster):
                count = count + 1
                x_total = x_total + coord_data.loc[cell]['x']
                y_total = y_total + coord_data.loc[cell]['y']
        temp = []
        temp.append(key_cell)
        temp.append(x_total / count)
        temp.append(y_total / count)
        temp.append('cell')
        p = p + 1
        csvData.append(temp)
    print("Post hoc clustering done....")

    # In[24]:

    with open(output_path + 'data.csv', 'w') as csvFile:
        writer = csv.writer(csvFile)
        writer.writerows(csvData)
    csvFile.close()
    data_df = pd.read_csv(output_path + "data.csv",
                          delimiter=",",
                          index_col=False)
    if n_pass != 4:
        clusters_info = [x for x in db if x != -1]
        clusters_info = clusters_info + cluster_assign
    else:
        clusters_info = clusters_info + cluster_assign
        data_df['cluster'] = clusters_info
    data_df.to_csv(output_path + 'data.csv')
    n_clusters = len(list(set(clusters_info)))
    print("cluster saved ....")

    n_clusters = len(data_df['cluster'].unique())
    colors = random.sample(seaborn_colors, n_clusters)

    colors = random.sample(seaborn_colors, n_clusters)
    plt.figure(figsize=(5, 5))
    #cmap = sns.cubehelix_palette(dark=.3, light=.8, as_cmap=True)
    ax = sns.scatterplot(x="x",
                         y="y",
                         data=data_df,
                         hue="cluster",
                         palette=sns.xkcd_palette(colors),
                         linewidth=0.0,
                         s=2)
    ax.legend(bbox_to_anchor=(1.0, 1.00), frameon=False)
    for cl in range(n_clusters):
        plt.annotate(cl,
                     data_df.loc[data_df['cluster'] == cl, ['x', 'y']].mean(),
                     horizontalalignment='center',
                     verticalalignment='center',
                     size=10,
                     weight='bold',
                     color="black")
    sns.despine(bottom=False, left=False)
    plt.xlabel("sd1", fontsize=20)
    plt.ylabel("sd2", fontsize=20)
    plt.setp(ax.spines.values(), linewidth=2)
    plt.yticks([], linewidth=20)
    plt.xticks([])
    plt.savefig(output_path + "cluster_visualization.png",
                bbox_inches='tight',
                dpi=600)
    plt.savefig(output_path + "cluster_visualization.pdf",
                bbox_inches='tight',
                dpi=600)

    if n_pass == 3:
        from sklearn.datasets import make_blobs
        from sklearn.metrics import silhouette_samples, silhouette_score
        silhouette_avg = silhouette_score(data_df[['x', 'y']],
                                          data_df['cluster'])
        sample_silhouette_values = silhouette_samples(data_df[['x', 'y']],
                                                      data_df['cluster'])
        print(silhouette_avg)

        y_lower = 10
        import matplotlib.cm as cm
        #fig, (ax1, ax2) = plt.subplots(1, 2)
        fig = plt.figure(figsize=(4, 7))
        #fig.set_size_inches(18, 7)
        for i in range(n_clusters):
            # Aggregate the silhouette scores for samples belonging to
            # cluster i, and sort them
            ith_cluster_silhouette_values = \
                sample_silhouette_values[data_df['cluster'] == i]

            ith_cluster_silhouette_values.sort()

            size_cluster_i = ith_cluster_silhouette_values.shape[0]
            y_upper = y_lower + size_cluster_i

            color = cm.nipy_spectral(float(i) / n_clusters)
            plt.fill_betweenx(np.arange(y_lower, y_upper),
                              0,
                              ith_cluster_silhouette_values,
                              facecolor=color,
                              edgecolor=color,
                              alpha=0.7)

            # Label the silhouette plots with their cluster numbers at the middle
            plt.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))

            # Compute the new y_lower for next plot
            y_lower = y_upper + 10  # 10 for the 0 samples

        plt.title("The silhouette plot for the various clusters.")
        plt.xlabel("silhouette coefficient", fontsize=20)
        plt.ylabel("Cluster label", fontsize=20)
        plt.axvline(x=silhouette_avg, color="red", linestyle="--")

        plt.yticks([])  # Clear the yaxis labels / ticks
        plt.xticks([-0.1, 0, 0.2, 0.4, 0.6, 0.8, 1])
        sns.despine(bottom=False, left=False)
        fig.savefig(output_path + "/silhouette.pdf",
                    bbox_inches='tight',
                    dpi=600)
        fig.savefig(output_path + "/silhouette.png",
                    bbox_inches='tight',
                    dpi=600)

    #  #  MARKER FINDING
    data_df = pd.read_csv(output_path + "data.csv",
                          delimiter=",",
                          index_col=False)
    data_df.set_index('data', inplace=True)
    import pickle
    if n_pass == 2:
        path = 'Stardust_results/visualization_output/1_pass'
    if n_pass == 3:
        path = 'Stardust_results/visualization_output/2_pass'
    if n_pass == 4:
        path = 'Stardust_results/visualization_output/3_pass'
    if n_pass != 1:
        with open(path + '/de_genes_cluster.txt', 'rb') as fp:
            de_gene_cluster = pickle.load(fp)

        marker = []
        disp_marker = []
        for cl in range(n_clusters):
            cls = data_df[data_df['cluster'] == cl]
            gene_df = cls[cls['type'] == 'gene']
            f = 0
            for rank in range(len(de_gene_cluster)):
                if f == 1:
                    break
                for gene in de_gene_cluster[rank]:
                    if gene in list(gene_df.index):
                        disp_marker.append(gene)
                        #print(cl)
                        f = 1
                        break
        marker = disp_marker

        #sys.exit(0)

    # # CELL GENE MARKER

    # In[28]:
    from sklearn.neighbors import KNeighborsRegressor
    prev_pass_data = pd.read_csv(
        'Stardust_results/visualization_output/3_pass/data_openOrd.csv')
    prev_pass_data.set_index('data', inplace=True)
    data_df = pd.read_csv(output_path + '/data.csv')
    data_df.set_index('data', inplace=True)
    gene_df = data_df[data_df['type'] == 'gene']
    x_gene_fit = list(gene_df['x'])
    y_gene_fit = list(gene_df['y'])
    cells = list(prev_pass_data.index)
    cell_list = []
    x_coord = []
    y_coord = []

    for i in range(len(cells)):
        if cells[i] in list(data_df.index):
            cell_list.append(cells[i])
            x_coord.append(prev_pass_data.iloc[i]['x'])
            y_coord.append(prev_pass_data.iloc[i]['y'])

    prev_df = pd.DataFrame(index=cell_list)
    prev_df['x'] = x_coord
    prev_df['y'] = y_coord

    import numpy as np
    from sklearn.linear_model import Lasso
    from sklearn.neighbors import KNeighborsRegressor
    import pickle
    cells = []
    genes = []
    gene_coord_x = []
    gene_coord_y = []

    for i in range(n_clusters):
        clust_data = data_df[data_df['cluster'] == i]
        clust_cells = clust_data[clust_data['type'] == 'cell']
        clust_genes = clust_data[clust_data['type'] == 'gene']
        cells.extend(list(clust_cells.index))
        genes.extend(list(clust_genes.index))
        if len(list(clust_genes.index)) == 0:
            continue
        model1 = KNeighborsRegressor(n_neighbors=4)

        model2 = KNeighborsRegressor(n_neighbors=4)
        temp = []
        for cell in list(clust_cells.index):
            if cell in list(prev_df.index):
                temp.append(cell)
        clust_cells = clust_cells.loc[temp]
        model1.fit(
            np.array(list(clust_cells['x'])).reshape((-1, 1)),
            np.array(list(prev_df.loc[list(clust_cells.index)]['x'])).reshape(
                (-1, 1)))

        filename = output_path + '/sd_x_KNN_model.sav'
        pickle.dump(model1, open(filename, 'wb'))
        #model1 = pickle.load(open(filename, 'rb'))
        x_gene_pred = model1.predict(
            np.array(list(clust_genes['x'])).reshape((-1, 1)))
        gene_coord_x.extend(x_gene_pred)
        model2.fit(
            np.array(list(clust_cells['y'])).reshape((-1, 1)),
            np.array(list(prev_df.loc[list(clust_cells.index)]['y'])).reshape(
                (-1, 1)))

        filename = output_path + '/sd_y_KNN_model.sav'
        pickle.dump(model2, open(filename, 'wb'))
        #model2 = pickle.load(open(filename, 'rb'))
        y_gene_pred = model2.predict(
            np.array(list(clust_genes['y'])).reshape((-1, 1)))
        gene_coord_y.extend(y_gene_pred)

    with open(output_path + "/sd_gene_coord_x.txt", 'wb') as fp:
        pickle.dump(gene_coord_x, fp)
    with open(output_path + "/sd_gene_coord_y.txt", 'wb') as fp:
        pickle.dump(gene_coord_y, fp)

    #with open (output_path+"/sd_gene_coord_x.txt", 'rb') as fp:
    #        gene_coord_x = pickle.load(fp)
    #with open (output_path+"/sd_gene_coord_y.txt", 'rb') as fp:
    #        gene_coord_y = pickle.load(fp)

    import matplotlib.pyplot as plt, mpld3
    from scipy.spatial import ConvexHull, convex_hull_plot_2d
    prev_pass_data = pd.read_csv(
        'Stardust_results/visualization_output/3_pass/data_openOrd.csv')
    prev_pass_data["alpha"] = np.where(prev_pass_data['type'] == 'gene', 1.0,
                                       0.5)
    color_gene = ["light blue"]
    color_cell = ["red"]
    #fig,ax1 = plt.subplots()
    plt.figure(figsize=(6, 6))
    ax = sns.scatterplot(x="x",
                         y="y",
                         data=prev_pass_data[prev_pass_data['alpha'] == 0.5],
                         hue="type",
                         palette=sns.xkcd_palette(color_gene),
                         sizes=(10, 5),
                         size="type",
                         alpha=0.3,
                         s=10)
    #sns.scatterplot(x="x", y="y", data=data_df[data_df['alpha']==1.0],hue="type",palette=sns.xkcd_palette(color_cell),sizes=(20,5),size="type",marker="^",alpha=1.0,ax=ax,s=10)
    sns.scatterplot(x=gene_coord_x,
                    y=gene_coord_y,
                    palette=sns.xkcd_palette(color_cell),
                    sizes=(20, 5),
                    marker="^",
                    alpha=1.0,
                    ax=ax,
                    s=10)
    for c in range(n_clusters):
        p = data_df[data_df["cluster"] == c]
        p = p[['x', 'y']]
        points = p.values
        hull = ConvexHull(points)
        #for simplex in hull.simplices:
    #    sns.lineplot(points[simplex, 0], points[simplex, 1])
    x_list = []
    y_list = []
    if n_pass != 1:
        for m in marker:
            #x_list.append(data_df.loc[m]['x'])
            x_list.append(gene_coord_x[genes.index(m)])
            #y_list.append(data_df.loc[m]['y'])
            y_list.append(gene_coord_y[genes.index(m)])
        for label, x, y in zip(marker, x_list, y_list):
            plt.annotate(
                label,
                xy=(x, y),
                xytext=(-20, 20),
                textcoords='offset points',
                ha='right',
                va='bottom',
                #bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.5),
                arrowprops=dict(arrowstyle='-', connectionstyle='arc3,rad=0'))
    ax.legend(bbox_to_anchor=(1.0, 1.00), frameon=False)
    sns.despine(bottom=False, left=False)
    plt.xlabel("sd1", fontsize=20)
    plt.ylabel("sd2", fontsize=20)
    plt.setp(ax.spines.values(), linewidth=2)
    plt.yticks([], linewidth=20)
    plt.xticks([])
    plt.savefig(output_path + "sd_embedding.png", bbox_inches='tight', dpi=600)
    plt.savefig(output_path + "sd_embedding.pdf", bbox_inches='tight', dpi=600)

    import matplotlib.pyplot as plt, mpld3
    from scipy.spatial import ConvexHull, convex_hull_plot_2d
    #data_df["alpha"] = np.where(data_df['type'] == 'gene', 1.0, 0.5)
    prev_pass_data.set_index('data', inplace=True)
    temp_data = prev_pass_data[prev_pass_data['type'] == 'cell']
    temp_genes = data_df[data_df['type'] == 'gene']
    for pos in range(0, len(genes)):
        temp_genes.at[genes[pos], 'x'] = gene_coord_x[pos]
        temp_genes.at[genes[pos], 'y'] = gene_coord_y[pos]
    temp_data.append(temp_genes)
    color_gene = ["light blue"]
    color_cell = ["red"]
    n_clusters = len(data_df['cluster'].unique())
    colors = random.sample(seaborn_colors, n_clusters)
    #fig,ax1 = plt.subplots()
    plt.figure(figsize=(6, 6))
    ax = sns.scatterplot(x="x",
                         y="y",
                         data=temp_data,
                         hue="cluster",
                         palette=sns.xkcd_palette(colors),
                         s=2,
                         linewidth=0.0)
    #sns.scatterplot(x="x", y="y", data=data_df[data_df['alpha']==1.0],hue="type",palette=sns.xkcd_palette(color_cell),sizes=(20,5),size="type",marker="^",alpha=1.0,ax=ax,s=10)
    #sns.scatterplot(x=gene_coord_x, y=gene_coord_y,palette=sns.xkcd_palette(color_cell),sizes=(20,5),marker="^",alpha=1.0,ax=ax,s=20)
    for c in range(n_clusters):
        p = data_df[data_df["cluster"] == c]
        p = p[['x', 'y']]
        points = p.values
        hull = ConvexHull(points)
        #for simplex in hull.simplices:
    #    sns.lineplot(points[simplex, 0], points[simplex, 1])
    x_list = []
    y_list = []
    d1 = prev_pass_data[prev_pass_data['alpha'] == 0.5]
    for cl in range(n_clusters):
        plt.annotate(cl,
                     d1.loc[d1['cluster'] == cl, ['x', 'y']].mean(),
                     horizontalalignment='center',
                     verticalalignment='center',
                     size=10,
                     weight='bold',
                     color="black")
    if n_pass != 1:
        for m in marker:
            #x_list.append(data_df.loc[m]['x'])
            x_list.append(gene_coord_x[genes.index(m)])
            #y_list.append(data_df.loc[m]['y'])
            y_list.append(gene_coord_y[genes.index(m)])
        for label, x, y in zip(marker, x_list, y_list):
            plt.annotate(
                label,
                xy=(x, y),
                xytext=(-20, 20),
                textcoords='offset points',
                ha='right',
                va='bottom',
                #bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.5),
                arrowprops=dict(arrowstyle='-', connectionstyle='arc3,rad=0'))
    ax.legend(bbox_to_anchor=(1.0, 1.00), frameon=False)
    sns.despine(bottom=False, left=False)
    plt.xlabel("sd1", fontsize=20)
    plt.ylabel("sd2", fontsize=20)
    plt.setp(ax.spines.values(), linewidth=2)
    plt.yticks([], linewidth=20)
    plt.xticks([])
    plt.savefig(output_path + "sd_color_embedding.png",
                bbox_inches='tight',
                dpi=600)
    plt.savefig(output_path + "sd_color_embedding.pdf",
                bbox_inches='tight',
                dpi=600)
    #sys.exit(0)
    # # UMAP CELL GENE MARKER # #

    if n_pass == 4:

        import pickle
        with open('Stardust_results/build_output/1_pass/umap_coord.txt',
                  'rb') as fp:
            umap_coord = pickle.load(fp)
        louvain_df = pd.read_csv(
            'Stardust_results/build_output/1_pass/louvain_cluster_df.csv')
        louvain_df.set_index('Unnamed: 0', inplace=True)
        #data_df = pd.read_csv('F:/output/output_visualize_melanoma_pca/3rd_pass/data.csv')
        data_df = pd.read_csv(output_path + '/data.csv')
        data_df.set_index('data', inplace=True)
        gene_df = data_df[data_df['type'] == 'gene']
        x_gene_fit = list(gene_df['x'])
        y_gene_fit = list(gene_df['y'])
        cells = list(louvain_df.index)
        cell_list = []
        x_coord = []
        y_coord = []
        for i in range(len(cells)):
            if cells[i] in list(data_df.index):
                cell_list.append(cells[i])
                x_coord.append(umap_coord[i][0])
                y_coord.append(umap_coord[i][1])
        umap_df = pd.DataFrame(index=cell_list)
        umap_df['x'] = x_coord
        umap_df['y'] = y_coord

        import numpy as np
        from sklearn.linear_model import Lasso
        from sklearn.neighbors import KNeighborsRegressor
        import pickle
        cells = []
        genes = []
        gene_coord_x = []
        gene_coord_y = []
        for i in range(n_clusters):
            clust_data = data_df[data_df['cluster'] == i]
            clust_cells = clust_data[clust_data['type'] == 'cell']
            clust_genes = clust_data[clust_data['type'] == 'gene']
            cells.extend(list(clust_cells.index))
            genes.extend(list(clust_genes.index))
            if len(list(clust_genes.index)) == 0:
                continue
            model1 = KNeighborsRegressor(n_neighbors=5)

            model2 = KNeighborsRegressor(n_neighbors=5)

            model1.fit(
                np.array(list(clust_cells['x'])).reshape((-1, 1)),
                np.array(list(umap_df.loc[list(
                    clust_cells.index)]['x'])).reshape((-1, 1)))

            filename = output_path + '/scanpy_x_KNN_model.sav'
            pickle.dump(model1, open(filename, 'wb'))
            #model1 = pickle.load(open(filename, 'rb'))
            x_gene_pred = model1.predict(
                np.array(list(clust_genes['x'])).reshape((-1, 1)))
            gene_coord_x.extend(x_gene_pred)
            model2.fit(
                np.array(list(clust_cells['y'])).reshape((-1, 1)),
                np.array(list(umap_df.loc[list(
                    clust_cells.index)]['y'])).reshape((-1, 1)))

            filename = output_path + '/scanpy_y_KNN_model.sav'
            pickle.dump(model2, open(filename, 'wb'))
            #model2 = pickle.load(open(filename, 'rb'))
            y_gene_pred = model2.predict(
                np.array(list(clust_genes['y'])).reshape((-1, 1)))
            gene_coord_y.extend(y_gene_pred)

        with open(output_path + "/scanpy_gene_coord_x.txt", 'wb') as fp:
            pickle.dump(gene_coord_x, fp)
        with open(output_path + "/scanpy_gene_coord_y.txt", 'wb') as fp:
            pickle.dump(gene_coord_y, fp)

        #with open (output_path+"/scanpy_gene_coord_x.txt", 'rb') as fp:
        #    gene_coord_x = pickle.load(fp)
        #with open (output_path+"/scanpy_gene_coord_y.txt", 'rb') as fp:
        #    gene_coord_y = pickle.load(fp)

        #n_clusters = len(list(data_df['cluster'].unique()))

        u_map_x = []
        u_map_y = []
        for ind in list(data_df.index):
            if ind in list(louvain_df.index):

                u_map_x.append(umap_coord[list(
                    louvain_df.index).index(ind)][0])
                u_map_y.append(umap_coord[list(
                    louvain_df.index).index(ind)][1])
            else:
                u_map_x.append(gene_coord_x[genes.index(ind)])
                u_map_y.append(gene_coord_y[genes.index(ind)])
        data_df['umap_x'] = u_map_x
        data_df['umap_y'] = u_map_y

        #        colors = random.sample(seaborn_colors,n_clusters)
        #colors = colors3
        plt.figure(figsize=(5, 5))
        #cmap = sns.cubehelix_palette(dark=.3, light=.8, as_cmap=True)
        ax = sns.scatterplot(x="umap_x",
                             y="umap_y",
                             data=data_df,
                             hue="cluster",
                             palette=sns.xkcd_palette(colors),
                             linewidth=0.0,
                             s=2)
        ax.legend(bbox_to_anchor=(1.0, 1.00), frameon=False)
        for cl in range(n_clusters):
            plt.annotate(cl,
                         data_df.loc[data_df['cluster'] == cl,
                                     ['umap_x', 'umap_y']].mean(),
                         horizontalalignment='center',
                         verticalalignment='center',
                         size=10,
                         weight='bold',
                         color="black")
        sns.despine(bottom=False, left=False)
        plt.xlabel("umap1", fontsize=20)
        plt.ylabel("umap2", fontsize=20)
        plt.setp(ax.spines.values(), linewidth=2)
        plt.yticks([], linewidth=20)
        plt.xticks([])
        plt.savefig(output_path + 'umap_clustering.png',
                    bbox_inches='tight',
                    dpi=600)
        plt.savefig(output_path + 'umap_clustering.pdf',
                    bbox_inches='tight',
                    dpi=600)

        import matplotlib.pyplot as plt, mpld3
        from scipy.spatial import ConvexHull, convex_hull_plot_2d
        data_df["alpha"] = np.where(data_df['type'] == 'gene', 1.0, 0.5)
        color_gene = ["light grey"]
        color_cell = ["red"]
        #fig,ax1 = plt.subplots()
        plt.figure(figsize=(6, 6))

        ax = sns.scatterplot(x="umap_x",
                             y="umap_y",
                             data=data_df[data_df['alpha'] == 0.5],
                             hue="type",
                             palette=sns.xkcd_palette(color_gene),
                             sizes=(10, 5),
                             size="type",
                             alpha=0.3,
                             s=10)
        sns.scatterplot(x="umap_x",
                        y="umap_y",
                        data=data_df[data_df['alpha'] == 1.0],
                        hue="type",
                        palette=sns.xkcd_palette(color_cell),
                        sizes=(20, 5),
                        size="type",
                        marker="^",
                        alpha=1.0,
                        ax=ax,
                        s=10)
        for c in range(n_clusters):
            p = data_df[data_df["cluster"] == c]
            p = p[['umap_x', 'umap_y']]
            points = p.values
            hull = ConvexHull(points)
            #for simplex in hull.simplices:
            #    sns.lineplot(points[simplex, 0], points[simplex, 1])
        x_list = []
        y_list = []
        for m in marker:
            x_list.append(data_df.loc[m]['umap_x'])
            #x_list.append(gene_coord_x[genes.index(m)])
            y_list.append(data_df.loc[m]['umap_y'])
            #y_list.append(gene_coord_y[genes.index(m)])
        for cl in range(n_clusters):
            plt.annotate(cl,
                         data_df.loc[data_df['cluster'] == cl,
                                     ['umap_x', 'umap_y']].mean(),
                         horizontalalignment='center',
                         verticalalignment='center',
                         size=10,
                         weight='bold',
                         color="black")
        for label, x, y in zip(marker, x_list, y_list):
            plt.annotate(
                label,
                xy=(x, y),
                xytext=(-20, 20),
                textcoords='offset points',
                ha='right',
                va='bottom',
                #bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.5),
                arrowprops=dict(arrowstyle='-', connectionstyle='arc3,rad=0'))
        ax.legend(bbox_to_anchor=(1.0, 1.00), frameon=False)
        sns.despine(bottom=False, left=False)
        plt.xlabel("umap1", fontsize=20)
        plt.ylabel("umap2", fontsize=20)
        plt.setp(ax.spines.values(), linewidth=2)
        plt.yticks([], linewidth=20)
        plt.xticks([])
        plt.savefig(output_path + 'umap_embedding.png',
                    bbox_inches='tight',
                    dpi=600)
        plt.savefig(output_path + 'umap_embedding.pdf',
                    bbox_inches='tight',
                    dpi=600)

        import matplotlib.pyplot as plt, mpld3
        from scipy.spatial import ConvexHull, convex_hull_plot_2d
        data_df["alpha"] = np.where(data_df['type'] == 'gene', 1.0, 0.5)
        color_gene = ["light grey"]
        color_cell = ["red"]
        #fig,ax1 = plt.subplots()
        plt.figure(figsize=(6, 6))
        #       colors = color
        ax = sns.scatterplot(x="umap_x",
                             y="umap_y",
                             data=data_df[data_df['alpha'] == 0.5],
                             hue="cluster",
                             linewidth=0.0,
                             sizes=(2, 5),
                             size="type",
                             palette=sns.xkcd_palette(colors),
                             s=2)
        sns.scatterplot(x="umap_x",
                        y="umap_y",
                        data=data_df[data_df['alpha'] == 1.0],
                        hue="type",
                        palette=sns.xkcd_palette(color_cell),
                        linewidth=0.1,
                        marker="^",
                        ax=ax,
                        alpha=1.0,
                        s=10)
        for c in range(n_clusters):
            p = data_df[data_df["cluster"] == c]
            p = p[['umap_x', 'umap_y']]
            points = p.values
            hull = ConvexHull(points)
            #for simplex in hull.simplices:
            #    sns.lineplot(points[simplex, 0], points[simplex, 1])
        x_list = []
        y_list = []
        for m in marker:
            x_list.append(data_df.loc[m]['umap_x'])
            y_list.append(data_df.loc[m]['umap_y'])
        for cl in range(n_clusters):
            plt.annotate(cl,
                         data_df.loc[data_df['cluster'] == cl,
                                     ['umap_x', 'umap_y']].mean(),
                         horizontalalignment='center',
                         verticalalignment='center',
                         size=10,
                         weight='bold',
                         color="black")
        for label, x, y in zip(marker, x_list, y_list):
            plt.annotate(
                label,
                xy=(x, y),
                xytext=(-20, 20),
                textcoords='offset points',
                ha='right',
                va='bottom',
                #bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.5),
                arrowprops=dict(arrowstyle='-', connectionstyle='arc3,rad=0'))
        ax.legend(bbox_to_anchor=(1.0, 1.00), frameon=False)
        sns.despine(bottom=False, left=False)
        plt.xlabel("umap1", fontsize=20)
        plt.ylabel("umap2", fontsize=20)
        plt.setp(ax.spines.values(), linewidth=2)
        plt.yticks([], linewidth=20)
        plt.xticks([])
        plt.savefig(output_path + 'umap_color_embedding.png',
                    bbox_inches='tight',
                    dpi=600)
        plt.savefig(output_path + 'umap_color_embedding.pdf',
                    bbox_inches='tight',
                    dpi=600)
words = ['queen', 'book', 'king', 'magazine', 'car', 'bike']
vectors = np.array([
    [0.1, 0.3],  # queen
    [-0.5, -0.1],  # book
    [0.2, 0.2],  # king
    [-0.3, -0.2],  # magazine
    [-0.5, 0.4],  # car
    [-0.45, 0.3]
])  # bike

#plot the words
plt.plot(vectors[:, 0], vectors[:, 1], 'o')
plt.xlim(-0.6, 0.3)
plt.ylim(-0.3, 0.5)
for word, x, y in zip(words, vectors[:, 0], vectors[:, 1]):
    plt.annotate(word, (x, y), size=12)
zip(words, vectors[:, 0], vectors[:, 1])
plt.show()

s = pd.Series([0.1, 0.4, 0.01, 0.2, 0.05],
              index=["pumpkin", "shoe", "tree", "prince", "luck"])
s.plot(kind='bar')
plt.ylabel("$P(w|Cinderella)$")
plt.show()

###########################################################################################
# Intro to Word2Vec  - Actual
###########################################################################################

# example data
sentences = [
Example #14
0
s0 = 10
c = 2
y0 = np.zeros(len(sT))
y1 = sT - s0  #stock only
y2 = (abs(sT - k) + sT - k) / 2 - c  #long a call
y3 = y1 - y2  #covered call
plt.ylim(-10, 30)
plt.plot(sT, y1)
plt.plot(sT, y2)
plt.plot(sT, y3, 'red')
plt.plot(sY, y0, 'b-.')
plt.plot([k, k], [-10, 10], 'black')
title('Covered call ( long one share and short one call)')
xlabel('Stock price')
ylabel('Profit (loss)')
plt.annotate(
    'Stock only (long one share)',
    xy=(24, 15),
    xytext=(15, 20),
    arrowprops=dict(facecolor='blue', shrink=0.01),
)
plt.annotate(
    'Long one share, short a call',
    xy=(10, 4),
    xytext=(9, 25),
    arrowprops=dict(facecolor='red', shrink=0.01),
)
plt.annotate('Exercise price= ' + str(k), xy=(k + 0.2, -10 + 0.5))

show()
Example #15
0
def main():		
	
	#### LOAD FACE DATA
	face_data,face_label = load_face_data('face(1).mat')

	#### PARTITION DATA INTO TRAIN AND TEST SET
	X_train,X_test,Y_train,Y_test = partition_data(face_data,face_label,show='no')
	
	#### OBTAIN ORIGINAL AND NORMALIZED FEATURE VECTORS 
	original_train, norm_train = get_original_normalized_feature_vectors(X_train,show='no')
	original_test, norm_test = get_original_normalized_feature_vectors(X_test,show='no')
	
	#### DIMENSION REDUCTION MAHALANOBIS 
	#methods = [maha_original,maha_norm]
	method_name = ['Maha Original','Maha Norm']
	data_type = [0,1]
	test_datas = [original_test.T, norm_test.T]
	train_datas = [original_train.T, norm_train.T]
	data_name = ['Original','Norm']
	M_pca_list = [16,32,64,128,256]
	recall_levels = 11
	
	# Dimension reduction
	# Get covariance matrix for original and norm data
	# Mahalanobis distance
	# Get map
	
	Mpca_list = []
	mAP_original_list = []
	mAP_norm_list = []
	
	for M_pca in M_pca_list:
		methods = []
		for type in data_type:

			pca = PCA(n_components=M_pca)									# Number of components for reduction
			Xtrain_pca = pca.fit_transform(train_datas[type])				# Dimension reduction for train data
			Xtest_pca = pca.transform(test_datas[type])						# Dimension reduction for test data
			
			S = np.cov(Xtrain_pca.T)										# Calculate covariance matrix
			S_inv = np.linalg.pinv(S)										# Covariance matrix pseudo-inverse
			def mahalanobis(u,v):											# Mahalanobis distance definition
				return distance.mahalanobis(u, v, S)
			maha = NearestNeighbors(n_neighbors=200, metric=mahalanobis)
			maha.fit(Xtest_pca)
			maha_nbrs = np.asarray(maha.kneighbors(Xtest_pca))
			map, maha_df = calculate_map(maha_nbrs, Y_test, recall_levels)	# Calculate mAP
			print(method_name[type],", Mpca =",M_pca,", mAP:",map)	
			
			if type == 0:
				mAP_original_list.append(map)
			if type == 1:
				mAP_norm_list.append(map)
				
		Mpca_list.append(M_pca)	
		print("								")

	x1 = Mpca_list
	y1 = mAP_original_list
	y2 = mAP_norm_list
				
	plt.figure(figsize=(8,8))
	plt.plot(x1, y1, color = 'red', label = 'Original Data', marker = 'o')
	plt.plot(x1, y2, color = 'green', label = 'Norm Data', marker = 'o')	
	plt.grid(color = 'black', linestyle = '-', linewidth = 0.1)			# parameters for plot grid
	title_name = str('Reduced Dimension Mahalanobis mAP')
	plt.title(title_name).set_position([0.5,1.05])
	plt.xlabel('Mpca')
	plt.ylabel('mAP')			
	plt.legend(loc = 'best')
	for i, txt in enumerate(y1):
		plt.annotate(txt, (x1[i], y1[i]))
	for i, txt in enumerate(y2):
		plt.annotate(txt, (x1[i], y2[i]))	
	#plt.savefig(title_name)
	plt.show()
	plt.close()
		
	sys.exit()
Example #16
0
plt.loglog(xr25/10,Nr25,'^',markersize=8,color=c1,label=r'$\sigma=0.25h_0$')
plt.loglog(xr30/10,Nr30,'v',markersize=8,color=c1,label=r'$\sigma=0.30h_0$')
plt.loglog(xr40/10,Nr40,'<',markersize=8,color=c1,label=r'$\sigma=0.40h_0$')
plt.loglog(xr50/10,Nr50,'>',markersize=8,color=c1,label=r'$\sigma=0.50h_0$')
plt.loglog(xg2/10,Ng2,'d',markersize=8,color=c2,label=r'$\gamma=0.2l_p$')
plt.loglog(xg4/10,Ng4,'o',markersize=8,color=c2,label=r'$\gamma=0.4l_p$')
plt.loglog(xg10/10,Ng10,'s',markersize=8,color=c2,label=r'$\gamma=l_p$')
plt.loglog(xg20/10,Ng20,'^',markersize=8,color=c2,label=r'$\gamma=2l_p$')
plt.loglog(xg40/10,Ng40,'v',markersize=8,color=c2,label=r'$\gamma=4l_p$')
plt.loglog(xg100/10,Ng100,'<',markersize=8,color=c2,label=r'$\gamma=10l_p$')
plt.loglog(xg1000/10,Ng1000,'>',markersize=8,color=c2,label=r'$\gamma=100l_p$')
plt.legend(loc='upper right',prop={'size':17})
plt.xticks((10**0, 10**1, 10**2), (r'$10^{0}$', r'$10^{1}$', r'$10^{2}$'), fontsize=24)
plt.yticks((10**0, 10**1, 10**2, 10**3), (r'$10^{0}$', r'$10^{1}$', r'$10^{2}$', r'$10^{3}$'), fontsize=24)
ax = plt.gca()
ax.tick_params(axis='both',reset=False,which='both',length=10,width=2,direction='bottom')
ax.yaxis.set_tick_params(length=20,width=2,direction='bottom')
ax.xaxis.set_tick_params(length=20,width=2,direction='bottom')
ax.tick_params(axis='both',reset=False,which='minor',length=10,width=1,direction='bottom')
ax.xaxis.set_tick_params(length=20,width=1,direction='bottom')
ax.yaxis.set_tick_params(length=20,width=1,direction='bottom')
plt.annotate(r'$N_{LSA}=173$', fontsize=24, xy=(6, 173), xytext=(9,173), horizontalalignment='left', verticalalignment='center', arrowprops=dict(facecolor='black', shrink=0.05))
plt.xlabel(r'$\mathit{L_w/l_p}$',fontsize=24)
plt.ylabel(r'$\mathit{N(L_{w})}$',fontsize=24)
l1 = [3,80]
l2 = [80,3]
scatter(l1,l2)
plot(l1,l2,color='black',linewidth=4)
plt.yticks((10**0, 10**1, 10**2, 10**3), (r'$10^{0}$', r'$10^{1}$', r'$10^{2}$', r'$10^{3}$'), fontsize=24)