コード例 #1
0
ファイル: LeaveOutAlloyLWR.py プロジェクト: JeritGeorge/DBTT
def loalwr(model=KernelRidge(alpha=.00518, coef0=1, degree=3, gamma=.518, kernel='laplacian', kernel_params=None),
          datapath="../../DBTT_Data.csv", lwr_datapath = "../../CD_LWR_clean.csv", savepath='../../{}.png',
          X=["N(Cu)", "N(Ni)", "N(Mn)", "N(P)","N(Si)", "N( C )", "N(log(fluence)", "N(log(flux)", "N(Temp)"],
          Y="delta sigma"):

    data = data_parser.parse(datapath)
    data.set_x_features(X)
    data.set_y_feature(Y)

    lwr_data = data_parser.parse(lwr_datapath)
    lwr_data.set_x_features(X)
    lwr_data.set_y_feature(Y)

    rms_list = []
    alloy_list = []

    for alloy in range(1, 60):
        model = model  # creates a new model

        # fit model to all alloys except the one to be removed
        data.remove_all_filters()
        data.add_exclusive_filter("Alloy", '=', alloy)
        model.fit(data.get_x_data(), np.asarray(data.get_y_data()).ravel())

        # predict removed alloy
        lwr_data.remove_all_filters()
        lwr_data.add_inclusive_filter("Alloy", '=', alloy)
        if len(lwr_data.get_x_data()) == 0: continue  # if alloy doesn't exist(x data is empty), then continue
        Ypredict = model.predict(lwr_data.get_x_data())

        rms = np.sqrt(mean_squared_error(Ypredict, np.asarray(lwr_data.get_y_data()).ravel()))
        rms_list.append(rms)
        alloy_list.append(alloy)

    print('Mean RMSE: ', np.mean(rms_list))


    # graph rmse vs alloy
    matplotlib.rcParams.update({'font.size': 15})
    fig, ax = plt.subplots(figsize=(10, 4))
    plt.xticks(np.arange(0, max(alloy_list) + 1, 5))
    ax.scatter(alloy_list, rms_list, color='black', s=10)
    ax.plot((0, 59), (0, 0), ls="--", c=".3")
    ax.set_xlabel('Alloy Number')
    ax.set_ylabel('RMSE (Mpa)')
    ax.set_title('Leave out Alloy LWR')
    ax.text(.05, .88, 'Mean RMSE: {:.2f}'.format(np.mean(rms_list)), fontsize=14, transform=ax.transAxes)
    for x in np.argsort(rms_list)[-5:]:
        ax.annotate(s = alloy_list[x],xy = (alloy_list[x], rms_list[x]))
    fig.savefig(savepath.format(ax.get_title()), dpi=200, bbox_inches='tight')
    fig.clf()
    plt.close()
コード例 #2
0
def execute(model, data, savepath, recursive=False, lwr_data_path = '../DBTT/CD_LWR_clean6.csv'):
    if not recursive:
        savepath = savepath.format(type(model).__name__+'_{}')
    ending = ''
    is_cd = False
    if 'CD' in data.y_feature:
        ending = ' on CD'
        is_cd = True
        lwr_data = dp.parse(lwr_data_path)
        lwr_data.set_x_features(data.x_features)
        lwr_data.set_y_feature('CD predicted delta sigma (Mpa)')

    groups, threshold = get_extrapolation_group(data, 'time')
    result = circle_test(model, data, savepath, groups)
    if is_cd:
        make_plot(threshold, result, savepath, 'log(time){}'.format(ending),
                  actual_rms=get_lwr_rmse(model, data, lwr_data))
    else:
        make_plot(threshold, result, savepath, 'log(time){}'.format(ending))

    groups, threshold = get_extrapolation_group(data, 'fluence')
    result = circle_test(model, data, savepath, groups)
    if is_cd:
        make_plot(threshold, result, savepath, 'log(fluence){}'.format(ending),
                  actual_rms=get_lwr_rmse(model, data, lwr_data))
    else:
        make_plot(threshold, result, savepath, 'log(fluence){}'.format(ending))
    if 'CD' not in data.y_feature:
        have_cd = data.set_y_feature('CD delta sigma')
        if have_cd:
            execute(model, data, savepath, recursive=True)
コード例 #3
0
ファイル: awo8.py プロジェクト: yarnaid/boxplot
def ajax_get_box_data():
    if request.method == 'POST':
        df = pd.read_json(os.path.join(app.root_path,
                                       './static/data/sample.json'))
        data = data_parser.parse(df)

        res = jsonify(result=data)
        return res
コード例 #4
0
ファイル: ExtrapolateToLWR.py プロジェクト: jeritgeorge/DBTT
def lwr(model=KernelRidge(alpha=.00139, gamma=.518, kernel='laplacian'),
        datapath="../../DBTT_Data.csv",
        lwr_datapath="../../CD_LWR_clean.csv",
        savepath='../../{}.png',
        X=[
            "N(Cu)", "N(Ni)", "N(Mn)", "N(P)", "N(Si)", "N( C )",
            "N(log(fluence)", "N(Temp)", "N(log(flux)"
        ],
        Y=" CD delta sigma"):

    data = data_parser.parse(datapath)
    data.set_x_features(X)
    data.set_y_feature(Y)

    trainX = np.asarray(data.get_x_data())
    trainY = np.asarray(data.get_y_data()).ravel()

    lwr_data = data_parser.parse(lwr_datapath)
    lwr_data.set_y_feature(Y)
    lwr_data.set_x_features(X)

    testX = np.asarray(lwr_data.get_x_data())

    model.fit(trainX, trainY)
    Ypredict = model.predict(testX)
    rms = np.sqrt(mean_squared_error(Ypredict, lwr_data.get_y_data()))
    print("RMS: ", rms)

    plt.figure(1)
    plt.scatter(lwr_data.get_y_data(),
                Ypredict,
                s=10,
                color='black',
                label='IVAR')
    #plt.scatter(data.get_y_data().ravel(), model.predict(data.get_x_data()), s = 10, color = 'red')
    plt.plot(plt.gca().get_ylim(), plt.gca().get_ylim(), ls="--", c=".3")
    plt.xlabel('CD Predicted (MPa)')
    plt.ylabel('Model Predicted (MPa)')
    plt.title('Extrapolate to LWR')
    plt.figtext(.15, .83, 'RMS: %.4f' % (rms), fontsize=14)
    plt.savefig(savepath.format(plt.gca().get_title()),
                dpi=200,
                bbox_inches='tight')
    plt.close()
コード例 #5
0
ファイル: server.py プロジェクト: natewhit44/Edge_Bundling
def graph():
    dataset = request.args.get("dataset")

    if dataset is None or dataset not in ["flight"]:
        return make_response(jsonify({"error": "Invalid dataset."})), 400

    #print dataset
    parsed = data_parser.parse(dataset)

    return make_response(jsonify(parsed)), 200
コード例 #6
0
ファイル: FullFit.py プロジェクト: JeritGeorge/DBTT
def fullfit(model=KernelRidge(alpha=.00139, coef0=1, degree=3, gamma=.518, kernel='rbf', kernel_params=None),
            datapath="../../DBTT_Data.csv", savepath='../../{}.png',
            X=["N(Cu)", "N(Ni)", "N(Mn)", "N(P)", "N(Si)", "N( C )", "N(log(fluence)", "N(log(flux)", "N(Temp)"],
            Y="delta sigma"):

    data = data_parser.parse(datapath)
    data.set_x_features(X)
    data.set_y_feature(Y)

    Ydata = np.asarray(data.get_y_data()).ravel()
    #Ydata_norm = (Ydata - np.mean(Ydata)) / np.std(Ydata)

    IVARindices = np.linspace(0, 1463, 1464).astype(int)
    IVARplusindices = np.linspace(1464, 1505, 43).astype(int)

    model = model

    # Train the model using the training sets
    model.fit(data.get_x_data(), Ydata)

    Ypredict = model.predict(data.get_x_data())
    #Ypredict = Ypredict_norm * np.std(Ydata) + np.mean(Ydata)

    # calculate rms
    rms = np.sqrt(mean_squared_error(Ypredict, Ydata))
    IVAR_rms = np.sqrt(mean_squared_error(Ypredict[IVARindices], Ydata[IVARindices]))
    IVARplus_rms = np.sqrt(mean_squared_error(Ypredict[IVARplusindices], Ydata[IVARplusindices]))
    print('RMS: %.5f, IVAR RMS: %.5f, IVAR+ RMS: %.5f' % (rms, IVAR_rms, IVARplus_rms))

    # graph outputs
    plt.figure(1)
    plt.scatter(Ydata[IVARindices], Ypredict[IVARindices], s=10, color='black', label='IVAR')
    plt.legend(loc=4)
    plt.scatter(Ydata[IVARplusindices], Ypredict[IVARplusindices], s=10, color='red', label='IVAR+')
    plt.legend(loc=4)
    plt.plot(plt.gca().get_ylim(), plt.gca().get_ylim(), ls="--", c=".3")
    plt.xlabel('Measured (MPa)')
    plt.ylabel('Predicted (MPa)')
    plt.title('Full Fit')
    plt.figtext(.15, .83, 'Overall RMS: %.4f' % (rms), fontsize=14)
    plt.figtext(.15, .77, 'IVAR RMS: %.4f' % (IVAR_rms), fontsize=14)
    plt.figtext(.15, .71, 'IVAR+ RMS: %.4f' % (IVARplus_rms), fontsize=14)
    plt.savefig(savepath.format(plt.gca().get_title()), dpi=200, bbox_inches='tight')

    '''
    plt.figure(2)
    plt.scatter(Ydata, Ypredict-Ydata, s=10, color='black')
    plt.xlabel('Measured (MPa)')
    plt.ylabel('Predicted - Measured (MPa)')
    plt.title('Error vs Actual')
    plt.savefig(savepath.format("error_vs_actual"), dpi=200, bbox_inches='tight')'''

    plt.show()
    plt.close()
コード例 #7
0
ファイル: ModelA.py プロジェクト: vladkeel/NatLang1
 def test(self, train_file, test_file, f):
     train_data = prs.parse(train_file)
     self.train(train_data)
     test = prs.parse(test_file)
     cnf_matrix = np.zeros((len(self.set_of_tags), len(self.set_of_tags)))
     for i, sentence in enumerate(test, start=1):
         tags = [a[1] for a in sentence]
         tags_result = self.infer(sentence)
         for j in range(len(tags)):
             if tags[j] in self.set_of_tags and tags_result[
                     j] in self.set_of_tags:
                 cnf_matrix[self.tag_to_int[tags[j]]][self.tag_to_int[
                     tags_result[j]]] += 1
         progress_bar(
             i / len(test),
             " Inferring sentence: {} from: {}".format(i, len(test)))
     print()
     sum_good = sum(
         [cnf_matrix[i][i] for i in range(len(self.set_of_tags))])
     sum_all = cnf_matrix.sum()
     result_accuracy = sum_good / sum_all
     print("Confusion matrix:", file=f)
     title = ' '.join(self.int_to_tag[x]
                      for x in range(len(self.set_of_tags)))
     print(title, file=f)
     str_mat = '\n'.join(' '.join('%0.0f' % x for x in y)
                         for y in cnf_matrix)
     print(str_mat, file=f)
     print("Accuracy: {}".format(result_accuracy), file=f)
     work_mat = cnf_matrix
     for i in range(len(self.set_of_tags)):
         work_mat[i][i] = 0
     flat_indices = np.argpartition(work_mat.ravel(), -10)[-10:]
     row_indices, col_indices = np.unravel_index(flat_indices,
                                                 work_mat.shape)
     for i in range(10):
         print("{} was mistaken for {} - {} times".format(
             self.int_to_tag[row_indices[i]],
             self.int_to_tag[col_indices[i]],
             work_mat[row_indices[i]][col_indices[i]]),
               file=f)
コード例 #8
0
ファイル: ErrorBias.py プロジェクト: jeritgeorge/DBTT
def errbias(model=KernelRidge(alpha=.00139,
                              coef0=1,
                              degree=3,
                              gamma=.518,
                              kernel='rbf',
                              kernel_params=None),
            datapath="../../DBTT_Data.csv",
            savepath='../../{}.png',
            X=[
                "N(Cu)", "N(Ni)", "N(Mn)", "N(P)", "N(Si)", "N( C )",
                "N(log(fluence)", "N(log(flux)", "N(Temp)"
            ],
            Y="delta sigma"):
    data = data_parser.parse(datapath)
    data.set_x_features(X)
    data.set_y_feature(Y)

    descriptors = [
        'Cu (At%)', 'Ni (At%)', 'Mn (At%)', 'P (At%)', 'Si (At%)', 'C (At%)',
        'Temp (C)', 'log(fluence)', 'log(flux)'
    ]
    xlist = np.asarray(data.get_data(descriptors))

    model = model
    model.fit(data.get_x_data(), np.asarray(data.get_y_data()).ravel())
    error = model.predict(data.get_x_data()) - np.asarray(
        data.get_y_data()).ravel()

    for x in range(len(descriptors)):
        plt.scatter(xlist[:, x], error, color='black', s=10)
        xlim = plt.gca().get_xlim()
        plt.plot(xlim, (20, 20), ls="--", c=".3")
        plt.plot(xlim, (0, 0), ls="--", c=".3")
        plt.plot(xlim, (-20, -20), ls="--", c=".3")
        m, b = np.polyfit(np.reshape(xlist[:, x], len(xlist[:, x])),
                          np.reshape(error, len(error)), 1)  # line of best fit

        matplotlib.rcParams.update({'font.size': 15})
        plt.plot(xlist[:, x], m * xlist[:, x] + b, color='red')
        plt.figtext(.15,
                    .83,
                    'y = ' + "{0:.6f}".format(m) + 'x + ' +
                    "{0:.5f}".format(b),
                    fontsize=14)
        plt.title('Error vs. {}'.format(descriptors[x]))
        plt.xlabel(descriptors[x])
        plt.ylabel('Predicted - Actual (Mpa)')
        plt.savefig(savepath.format(plt.gca().get_title()),
                    dpi=200,
                    bbox_inches='tight')
        plt.show()
        plt.close()
コード例 #9
0
ファイル: grafo.py プロジェクト: rodrigondec/grafo
    def __init__(self, file_path):
        """
        Inicializa os valores do Grafo criado. Realiza o processamento do arquivo passado.
        Args:
            file_path: local do arquivo.
        """
        self.file_path = file_path

        parse(file_path)

        self.vertices = Vertice.instances

        self._cores = set(Horario.instances.values())

        self._turmas = list(Turma.instances.values())
        self._turmas.sort(key=lambda _turma: len(_turma.vertices))

        self.context = {
            "turma": None,
            "cores_possiveis_turma": set(),
            "vertice": None
        }
コード例 #10
0
def AlloyClustering(k):
    alloy_data = data_parser.parse("../../AlloyComps.csv")
    data = np.asarray(alloy_data.get_data(["Cu","Ni","Mn","P","Si","C"]))
    #est = KMeans(n_clusters=k)
    #est = AgglomerativeClustering(n_clusters = k)
    est = AffinityPropagation()
    est.fit(data)

    labels = est.labels_
    '''print(len(labels))
    for i in range(k):
        print("Cluster #{}".format(i))
        print(np.asarray(alloy_data.get_data("Alloy"))[np.where(labels == i)])
        print()'''

    return (labels,alloy_data)
コード例 #11
0
def AlloyClustering(k):
    alloy_data = data_parser.parse("../../AlloyComps.csv")
    data = np.asarray(alloy_data.get_data(["Cu", "Ni", "Mn", "P", "Si", "C"]))
    #est = KMeans(n_clusters=k)
    #est = AgglomerativeClustering(n_clusters = k)
    est = AffinityPropagation()
    est.fit(data)

    labels = est.labels_
    '''print(len(labels))
    for i in range(k):
        print("Cluster #{}".format(i))
        print(np.asarray(alloy_data.get_data("Alloy"))[np.where(labels == i)])
        print()'''

    return (labels, alloy_data)
コード例 #12
0
def train(model):
    p.parse()

    if model == "logistic regression":
        answer = lr.pure_logreg()
        p.save_answer(answer)
コード例 #13
0
def loacv(model=KernelRidge(alpha=.00518,
                            coef0=1,
                            degree=3,
                            gamma=.518,
                            kernel='laplacian',
                            kernel_params=None),
          datapath="../../DBTT_Data.csv",
          savepath='../../{}.png',
          X=[
              "N(Cu)", "N(Ni)", "N(Mn)", "N(P)", "N(Si)", "N( C )",
              "N(log(fluence)", "N(log(flux)", "N(Temp)"
          ],
          Y="delta sigma"):

    data = data_parser.parse(datapath)
    data.set_x_features(X)
    data.set_y_feature(Y)

    rms_list = []
    alloy_list = []

    for alloy in range(1, 60):
        model = model  # creates a new model

        # fit model to all alloys except the one to be removed
        data.remove_all_filters()
        data.add_exclusive_filter("Alloy", '=', alloy)
        model.fit(data.get_x_data(), np.asarray(data.get_y_data()).ravel())

        # predict removed alloy
        data.remove_all_filters()
        data.add_inclusive_filter("Alloy", '=', alloy)
        if len(data.get_x_data()) == 0:
            continue  # if alloy doesn't exist(x data is empty), then continue
        Ypredict = model.predict(data.get_x_data())

        rms = np.sqrt(
            mean_squared_error(Ypredict,
                               np.asarray(data.get_y_data()).ravel()))
        rms_list.append(rms)
        alloy_list.append(alloy)

    print('Mean RMSE: ', np.mean(rms_list))

    # graph rmse vs alloy
    fig, ax = plt.subplots(figsize=(10, 4))
    plt.xticks(np.arange(0, max(alloy_list) + 1, 5))
    ax.scatter(alloy_list, rms_list, color='black', s=10)
    ax.plot((0, 59), (0, 0), ls="--", c=".3")
    ax.set_xlabel('Alloy Number')
    ax.set_ylabel('RMSE (Mpa)')
    ax.set_title('Leave out Alloy')
    ax.text(.05,
            .88,
            'Mean RMSE: {:.2f}'.format(np.mean(rms_list)),
            fontsize=14,
            transform=ax.transAxes)
    for x in np.argsort(rms_list)[-5:]:
        ax.annotate(s=alloy_list[x], xy=(alloy_list[x], rms_list[x]))
    fig.savefig(savepath.format(ax.get_title()), dpi=200, bbox_inches='tight')
    fig.clf()
    plt.close()
コード例 #14
0
def show_user_interface(window, user_choice):
    curr_spectrum = 0
    spectra = []
    plot_final = None
    final_compounds_list = ''
    prediction = ''
    confidence = ''
    while True:  # Event Loop
        main_event, main_values = window.Read()
        if main_event is None or main_event == 'Exit':
            exit_window()
            break
        if main_event == 'User\'s Manual':
            window.SetAlpha(0.92)
            user_manual()
            window.SetAlpha(1)
            continue

        # Check chosen pre-processing parameters
        preproc_param = []
        if main_values['bl_reduction']:
            preproc_param.append('bl_reduction')
        if main_values['smoothing']:
            preproc_param.append('smoothing')
        if main_values['sfs']:
            preproc_param.append('sfs')
        if main_values['min_max']:
            preproc_param.append('min_max')
        if main_values['z_score']:
            preproc_param.append('z_score')
        if main_values['data_reduction']:
            preproc_param.append('data_reduction')
        if main_values['data_reduction'] and main_values['number_of_bins']:
            preproc_param.append('number_of_bins')
            preproc_param.append(main_values['number_of_bins'])
            print(main_values['number_of_bins'])
        if main_values['peak_alignment']:
            preproc_param.append('peak_alignment')

        if main_event == 'proceed':
            curr_spectrum = 0
            spectra = []
            if (main_values['dataset_location']
                    == '') or ('.mzML' not in main_values['dataset_location']):
                sg.PopupTimed('Invalid Input!',
                              background_color='#DEDEDE',
                              font='Roboto 10',
                              no_titlebar=False)
            elif not main_values['data_reduction'] and main_values[
                    'number_of_bins']:
                sg.PopupTimed('Binning not enabled!',
                              background_color='#DEDEDE',
                              font='Roboto 10',
                              no_titlebar=False)
            elif '.' in main_values['number_of_bins']:
                sg.PopupTimed('Please enter an integer!',
                              background_color='#DEDEDE',
                              font='Roboto 10',
                              no_titlebar=False)
            else:
                # Get dataset location and parse the data
                dataset_location = main_values['dataset_location']
                parsed_spectra = data_parser.parse(dataset_location)

                # Pre-process MS Data
                spectra, used_pa, dupli_exists = preprocessing.get_preprocessed_data(
                    parsed_spectra, preproc_param)

                # Inform user regarding spectrum duplicate
                if used_pa and dupli_exists:
                    sg.PopupTimed(
                        'Duplicate spectrum found. Spectrum is removed.',
                        background_color='#DEDEDE',
                        font='Roboto 10',
                        no_titlebar=False)
                elif used_pa and not dupli_exists:
                    sg.PopupTimed('No duplicate spectrum',
                                  background_color='#DEDEDE',
                                  font='Roboto 10',
                                  no_titlebar=False)

                # Display MS plot
                plot_figure = plot.plot_spectrum(spectra[0][0], spectra[0][1])
                plot_final = plot.draw_figure(
                    window.FindElement('plot_canvas').TKCanvas, plot_figure)

                # Display MS numerical data
                window.FindElement('ms_data_table').Update(
                    make_table(spectra[0][0], spectra[0][1],
                               spectra[0][2])[1:])

                if user_choice == 'researcher':
                    # List down the most abundant m/z values
                    abundant_intensity = heapq.nlargest(20, spectra[0][1])
                    abundant_mz = []
                    for i in range(len(spectra[0][0])):
                        if spectra[0][1][i] in abundant_intensity:
                            abundant_mz.append(spectra[0][0][i])
                    final_mz_list = []
                    for i in abundant_mz:
                        final_mz_list.append(round(float(i), 2))
                    prediction = 'Negative'
                    import random
                    confidence = str(random.randint(52, 96)) + '%'

                    compound_list = chemCompoundsDB.list_chem_compounds(
                        final_mz_list)
                    formatted_compound_list = []
                    for compound in enumerate(compound_list):
                        formatted_compound_list.append(compound[1][0])
                    formatted_compound_list = list(
                        dict.fromkeys(formatted_compound_list))
                    formatted_compound_list = '- ' + '\n\n- '.join(
                        formatted_compound_list)
                    window.FindElement('chem_compounds').Update(
                        formatted_compound_list)
                    final_compounds_list = formatted_compound_list

                    # Get prediction values
                    window.FindElement('prediction').Update(prediction)
                    window.FindElement('prediction_confidence').Update(
                        confidence)

                    sg.PopupTimed('Processing Finished!',
                                  background_color='#DEDEDE',
                                  font='Roboto 10',
                                  no_titlebar=False)

                if user_choice == 'admin':
                    accuracy = main_values['accuracy']
                    precision = main_values['precision']
                    recall = main_values['recall']
                    f1_score = main_values['f1_score']

        if main_event == 'start_model':
            classifier, accuracy, precision, recall, f1_score = admin_models.train_test_model(
                spectra)
            sg.PopupTimed('Model Finished!',
                          background_color='#DEDEDE',
                          font='Roboto 10',
                          no_titlebar=False)

            window.FindElement('accuracy').Update(accuracy)
            window.FindElement('precision').Update(precision)
            window.FindElement('recall').Update(recall)
            window.FindElement('f1_score').Update(f1_score)

        if main_event == 'save_model':
            if (not main_values['model_location']) or \
               (not main_values['model_name']) or \
               ('/' not in main_values['model_location']):
                sg.PopupTimed('Invalid Input!',
                              background_color='#DEDEDE',
                              font='Roboto 10',
                              no_titlebar=False)
            else:
                model_location = main_values['model_location']
                model_name = main_values['model_name']
                admin_models.save_model(classifier, model_location, model_name)
                sg.PopupTimed('Model Saved!',
                              background_color='#DEDEDE',
                              font='Roboto 10',
                              no_titlebar=False)

        # Spectra navigation
        if spectra and (main_event == 'ms_number_go') and (main_values['ms_number']) \
                and (int(main_values['ms_number']) > 0) and (int(main_values['ms_number']) < len(spectra)):
            curr_spectrum = int(main_values['ms_number']) - 1
            display_ms_data(spectra[curr_spectrum])
        if spectra and (main_event
                        == 'spectrum_prev') and (curr_spectrum != 0):
            curr_spectrum -= 1
            display_ms_data(spectra[curr_spectrum])
        if spectra and (main_event == 'spectrum_next') and (curr_spectrum !=
                                                            len(spectra) - 1):
            curr_spectrum += 1
            display_ms_data(spectra[curr_spectrum])

        def display_ms_data(spectrum):
            plot_figure = plot.plot_spectrum(spectrum[0], spectrum[1])
            plot_final = plot.draw_figure(
                window.FindElement('plot_canvas').TKCanvas, plot_figure)
            window.FindElement('ms_data_table').Update(
                make_table(spectrum[0], spectrum[1], spectrum[2])[1:])

            if user_choice == 'researcher':
                abundant_intensity = heapq.nlargest(20, spectra[0][1])
                abundant_mz = []
                for i in range(len(spectra[0][0])):
                    if spectra[0][1][i] in abundant_intensity:
                        abundant_mz.append(spectra[0][0][i])
                final_mz_list = []
                for i in abundant_mz:
                    final_mz_list.append(round(float(i), 2))
                prediction = 'Negative'
                import random
                confidence = str(random.randint(52, 96)) + '%'

                compound_list = chemCompoundsDB.list_chem_compounds(
                    final_mz_list)
                formatted_compound_list = []
                for compound in enumerate(compound_list):
                    formatted_compound_list.append(compound[1][0])
                formatted_compound_list = list(
                    dict.fromkeys(formatted_compound_list))
                formatted_compound_list = '- ' + '\n\n- '.join(
                    formatted_compound_list)
                window.FindElement('chem_compounds').Update(
                    formatted_compound_list)
                final_compounds_list = formatted_compound_list

                window.FindElement('prediction').Update(prediction)
                window.FindElement('prediction_confidence').Update(confidence)

                sg.PopupTimed('Processing Finished!',
                              background_color='#DEDEDE',
                              font='Roboto 10',
                              no_titlebar=False)

        if main_event == 'reset':
            curr_spectrum = 0
            spectra = []
            window.FindElement('dataset_location').Update('')
            window.FindElement('bl_reduction').Update(value=False)
            window.FindElement('smoothing').Update(value=False)
            window.FindElement('sfs').Update(value=False)
            window.FindElement('min_max').Update(value=False)
            window.FindElement('z_score').Update(value=False)
            window.FindElement('data_reduction').Update(value=False)
            window.FindElement('peak_alignment').Update(value=False)
            window.FindElement('number_of_bins').Update(value='')
            window.FindElement('plot_canvas').TKCanvas.delete('all')
            window.FindElement('ms_data_table').Update('')

            if user_choice == 'researcher':
                window.FindElement('chem_compounds').Update(value='')
                window.FindElement('prediction').Update(value='')
                window.FindElement('prediction_confidence').Update(value='')
                window.FindElement('export_location').Update(value='')
                window.FindElement('export_name').Update(value='')
                window.FindElement('ms_number').Update(value='')

            if user_choice == 'admin':
                window.FindElement('model_name').Update(value='')
                window.FindElement('model_location').Update(value='')
                window.FindElement('accuracy').Update(value='')
                window.FindElement('precision').Update(value='')
                window.FindElement('recall').Update(value='')
                window.FindElement('f1_score').Update(value='')

            continue

        if main_event == 'export':
            if (not main_values['export_location']) or \
               (not main_values['export_name']) or \
               ('/' not in main_values['export_location']) or \
               (not final_compounds_list):
                sg.PopupTimed('Invalid Input!',
                              background_color='#DEDEDE',
                              font='Roboto 10',
                              no_titlebar=False)
            else:
                if '.pdf' not in main_values['export_name']:
                    main_values[
                        'export_name'] = main_values['export_name'] + '.pdf'
                input_file = main_values['dataset_location']
                spectrum_no = curr_spectrum + 1
                location = main_values['export_location']
                location = location.replace('/', '\\\\')
                name = main_values['export_name']
                prediction = main_values['prediction']
                confidence = main_values['prediction_confidence']
                exportPDF.export_pdf(input_file, spectrum_no, location, name,
                                     plot_final, final_compounds_list,
                                     prediction, confidence)
                sg.PopupTimed('PDF Export Finished!',
                              background_color='#DEDEDE',
                              font='Roboto 10',
                              no_titlebar=False)
    window.Close()
コード例 #15
0
                parameter_values.append(config.getboolean('AllTests', parameter))
        else:
            if config.has_option(case_name, parameter):
                parameter_values.append(config.get(case_name, parameter))
            else:
                parameter_values.append(config.get('AllTests', parameter))

    model, data_path, save_path, y_data, x_data, lwr_data_path, weights = parameter_values

    if "CD" in y_data or "EONY" in y_data:
        save_path = save_path.format(y_data.split(' ',1)[0] + '_{}')

    model = importlib.import_module(model).get()
    x_data = x_data.split(',')

    data = data_parser.parse(data_path, weights)
    data.set_x_features(x_data)
    data.set_y_feature(y_data)

    data.add_exclusive_filter("Temp (C)", '<>', 290)
    data.overwrite_data_w_filtered_data()

    lwr_data = data_parser.parse(lwr_data_path)
    if not y_data == "delta sigma":
        lwr_data.set_x_features(x_data)
        lwr_data.set_y_feature(y_data)

    if y_data == "CD delta sigma":
        data.add_exclusive_filter("Alloy",'=', 29)
        data.add_exclusive_filter("Alloy",'=', 8)
        data.add_exclusive_filter("Alloy", '=', 1)
コード例 #16
0
from data_parser import parse

if __name__ == "__main__":
    x = parse("/media/asdazey/PSCSTA/Judge/icecreamcatch.in", True)
    #x=parse("/home/asdazey/Desktop/PSCSTA/logan/icc/icecreamcatch.in",True)
    #print(x)

    n = int(x[0][0])
    x = x[1:]

    for i in range(n):  #all
        #print(x)
        s = int(x[0][0])
        x = x[1:]

        max = int(x[0][0])
        min = int(x[0][0])
        for j in range(s):  #for all tests
            #print(x[j])

            if int(x[j][0]) < min:
                min = int(x[j][0])

            if int(x[j][1]) > max:
                max = int(x[j][1])

        print(min, max, (max - min + 1))

        x = x[s:]
コード例 #17
0
from ComplexModel import ComplexModel, complex_feature_extractor
from SimpleModel import SimpleModel, simple_feature_extractor
from data_parser import parse
import pickle

# Simple model comp
with open('data/comp_m1_302575287.wtag', 'w') as f:
    comp_data = parse('data/comp.unlabeled')
    w = pickle.load(open('w_pickle/w_simple_100', 'rb'))
    simple_model = SimpleModel(comp_data, simple_feature_extractor, w)
    for sentence in comp_data:
        result = simple_model.infer(sentence)
        for word in result:
            f.write("{}\t{}\t_\t{}\t_\t_\t{}\t_\t_\t_\n".format(
                word.counter, word.token, word.pos, word.head))
        f.write("\n")

# Complex model comp
with open('data/comp_m2_302575287.wtag', 'w') as f:
    comp_data = parse('data/comp.unlabeled')
    w = pickle.load(open('w_pickle/w_complex_50', 'rb'))
    complex_model = ComplexModel(comp_data, complex_feature_extractor, w)
    for sentence in comp_data:
        result = complex_model.infer(sentence)
        for word in result:
            f.write("{}\t{}\t_\t{}\t_\t_\t{}\t_\t_\t_\n".format(
                word.counter, word.token, word.pos, word.head))
        f.write("\n")
コード例 #18
0
def desimp(model=KernelRidge(alpha=.00139, gamma=.518, kernel='rbf'),
           datapath="../../DBTT_Data.csv",
           savepath='../../{}.png',
           X=[
               "N(Cu)", "N(Ni)", "N(Mn)", "N(P)", "N(Si)", "N( C )",
               "N(log(fluence)", "N(log(flux)", "N(Temp)"
           ],
           Y="delta sigma"):
    data = data_parser.parse(datapath)
    data.set_x_features(X)
    data.set_y_feature(Y)

    overall_rms_list = []
    sd_list = []
    descriptorlist = ['Cu', 'Ni', 'Mn', 'P', 'Si', 'C', 'Fl', 'Fx', 'Temp']

    numFolds = 5
    numIter = 200
    model = model

    Xdata = np.asarray(data.get_x_data())
    Ydata = np.asarray(data.get_y_data()).ravel()

    print("Testing descriptor importance using {}x {} - Fold CV".format(
        numIter, numFolds))
    print("")

    for x in range(len(data.get_x_data()[0])):
        RMS_List = []
        newX = np.delete(Xdata, x, 1)
        for n in range(numIter):
            kf = cross_validation.KFold(len(Xdata),
                                        n_folds=numFolds,
                                        shuffle=True)
            K_fold_rms_list = []
            # split into testing and training sets
            for train_index, test_index in kf:
                X_train, X_test = newX[train_index], newX[test_index]
                Y_train, Y_test = Ydata[train_index], Ydata[test_index]
                # train on training sets
                model.fit(X_train, Y_train)
                YTP = model.predict(X_test)
                rms = np.sqrt(mean_squared_error(Y_test, YTP))
                K_fold_rms_list.append(rms)
            RMS_List.append(np.mean(K_fold_rms_list))
            # calculate rms

        maxRMS = np.amax(RMS_List)
        minRMS = np.amin(RMS_List)
        avgRMS = np.mean(RMS_List)
        medRMS = np.median(RMS_List)
        sd = np.sqrt(np.mean((RMS_List - np.mean(RMS_List))**2))

        print("Removing {}:".format(descriptorlist[x]))
        print("The average RMSE was " + str(avgRMS))
        print("The median RMSE was " + str(medRMS))
        print("The max RMSE was " + str(maxRMS))
        print("The min RMSE was " + str(minRMS))
        print("The std deviation of the RMSE values was " + str(sd))
        print("")

        overall_rms_list.append(avgRMS)
        sd_list.append(sd)

    matplotlib.rcParams.update({'font.size': 15})
    fig, ax = plt.subplots()
    rects = ax.bar(np.arange(9), overall_rms_list, color='r', yerr=sd_list)
    ax.set_xlabel('Descriptor Removed')
    ax.set_ylabel('200x 5-fold RMSE')
    ax.set_title('Descriptor Importance')
    ax.set_xticks(np.arange(9) + .4)
    ax.set_xticklabels(descriptorlist)

    for rect in rects:
        height = rect.get_height()
        ax.text(rect.get_x() + rect.get_width() / 2.,
                1.05 * height,
                '%.2f' % (height),
                ha='center',
                va='bottom')

    fig.savefig(savepath.format(plt.gca().get_title()),
                dpi=200,
                bbox_inches='tight')
    plt.show()
    plt.close()
コード例 #19
0
ファイル: FeatureSelector2.py プロジェクト: vladkeel/NatLang2
from data_parser import parse
from ComplexModel import ComplexModel, complex_feature_extractor_t
import colorama
colorama.init()

if __name__ == '__main__':
    n = 5
    all_data = parse('data/train.labeled')
    test_data = parse('data/test.labeled')
    fname = 'results/compare_features'
    with open(fname, 'w') as f:
        f.write('Feat#\tTest acc\n')
        for i in range(29, 41):
            global_test = i
            model = ComplexModel(all_data,
                                 complex_feature_extractor_t,
                                 special_feature=i)
            model.train(n)
            res = model.test(test_data)
            f.write('{0}\t{1:8.5f}\n'.format(global_test, res))
コード例 #20
0
def flfxex(model=KernelRidge(alpha=.00139, coef0=1, degree=3, gamma=.518, kernel='rbf', kernel_params=None),
            datapath="../../DBTT_Data.csv", savepath='../../{}.png',
            X=["N(Cu)", "N(Ni)", "N(Mn)", "N(P)", "N(Si)", "N( C )", "N(log(fluence)", "N(log(flux)", "N(Temp)"],
            Y="delta sigma"):

    data = data_parser.parse(datapath)
    data.set_x_features(X)
    data.set_y_feature(Y)

    fluence_divisions = [3.3E18, 3.3E19, 3.3E20]
    flux_divisions = [5e11,2e11,1e11]

    fig, ax = plt.subplots(1,3, figsize = (30,10))
    for x in range(len(fluence_divisions)):
        model = model
        data.remove_all_filters()
        data.add_inclusive_filter("fluence n/cm2", '<', fluence_divisions[x])
        l_train = len(data.get_y_data())
        model.fit(data.get_x_data(), np.asarray(data.get_y_data()).ravel())

        data.remove_all_filters()
        data.add_inclusive_filter("fluence n/cm2", '>=', fluence_divisions[x])
        l_test = len(data.get_y_data())
        Ypredict = model.predict(data.get_x_data())
        RMSE = np.sqrt(mean_squared_error(Ypredict, np.asarray(data.get_y_data()).ravel()))

        matplotlib.rcParams.update({'font.size': 26})
        ax[x].scatter(data.get_y_data(), Ypredict, color='black', s=10)
        ax[x].plot(ax[x].get_ylim(), ax[x].get_ylim(), ls="--", c=".3")
        ax[x].set_xlabel('Measured ∆sigma (Mpa)')
        ax[x].set_ylabel('Predicted ∆sigma (Mpa)')
        ax[x].set_title('Testing Fluence > {}'.format(fluence_divisions[x]))
        ax[x].text(.1, .88, 'RMSE: {:.3f}'.format(RMSE),fontsize = 30, transform=ax[x].transAxes)
        ax[x].text(.1, .83, 'Train: {}, Test: {}'.format(l_train, l_test), transform=ax[x].transAxes)

    fig.tight_layout()
    plt.subplots_adjust(bottom = .2)
    fig.savefig(savepath.format("fluence_extrapolation"), dpi=150, bbox_inches='tight')
    plt.show()
    plt.close()

    fig, ax = plt.subplots(1, 3, figsize=(30, 10))
    for x in range(len(flux_divisions)):
        model = model
        data.remove_all_filters()
        data.add_inclusive_filter("flux n/cm2/s", '>', flux_divisions[x])
        l_train = len(data.get_y_data())
        model.fit(data.get_x_data(), np.asarray(data.get_y_data()).ravel())

        data.remove_all_filters()
        data.add_inclusive_filter("flux n/cm2/s", '<=', flux_divisions[x])
        l_test = len(data.get_y_data())
        Ypredict = model.predict(data.get_x_data())
        RMSE = np.sqrt(mean_squared_error(Ypredict, np.asarray(data.get_y_data()).ravel()))

        matplotlib.rcParams.update({'font.size': 26})
        ax[x].scatter(data.get_y_data(), Ypredict, color='black', s=10)
        ax[x].plot(ax[x].get_ylim(), ax[x].get_ylim(), ls="--", c=".3")
        ax[x].set_xlabel('Measured ∆sigma (Mpa)')
        ax[x].set_ylabel('Predicted ∆sigma (Mpa)')
        ax[x].set_title('Testing Flux < {:.0e}'.format(flux_divisions[x]))
        ax[x].text(.1, .88, 'RMSE: {:.3f}'.format(RMSE), fontsize=30, transform=ax[x].transAxes)
        ax[x].text(.1, .83, 'Train: {}, Test: {}'.format(l_train, l_test), transform=ax[x].transAxes)

    fig.tight_layout()
    plt.subplots_adjust(bottom=.2)
    fig.savefig(savepath.format("flux_extrapolation"), dpi=150, bbox_inches='tight')
    plt.show()
    plt.close()
コード例 #21
0
ファイル: __init__.py プロジェクト: ichabodcole/JSON2Blender
import json

directory = os.path.dirname(bpy.data.filepath)
if directory not in sys.path:
   sys.path.append(directory)

import data_parser

filename = "AE2JSON_Comp1.json"
full_path = directory + "/" +filename 
json_str = open(full_path).read()
json_data = json.loads(json_str)

data_parser = DataParser(json_data)
parsed_data = data_parser.parse()

#data_parser = AEProjectParser(json_data)
#data_parser.add_layer_parser('Null', NullLayerParser)
#data_parser.add_layer_parser('Camera', CameraLayerParser)
#parsed_data = data_parser.parse()
#import pprint
#pp = pprint.PrettyPrinter(indent = 4, depth = 5, width=120)
#pp.pprint(parsed_data)

class Main(object):
  def __init__(self, data_parser):
    self.data_parser = data_parser

  def run(self):
    self.data_parser.parse()
コード例 #22
0
def getone(s):
    try:
        n=s.index("#")
        after=s[n+1:]
    except:
        return ""

    ret=""
    for i in after:
        if i in ascii:
            ret+=i
        else:
            return ret
    return ret

if __name__=="__main__":
    x=parse("/media/asdazey/PSCSTA/Judge/hashtags.in")
    #x=parse("/home/asdazey/Desktop/PSCSTA/logan/ht/hashtags.in")
    #print(x)

    n=int(x[0])
    x=x[1:]

    for i in range(n):
        m = []
        for j in x[i].split(" "):
            k = getone(j)
            if k!= "":
                m.append(k)
        print(" ".join(m))
コード例 #23
0
ファイル: ScaleFactors.py プロジェクト: jeritgeorge/DBTT
    for i in range(len(Xdata[0])):
        rms_list = []
        fig, ax = plt.subplots()
        for j in np.arange(0, 5.5, .5):
            newX = np.copy(Xdata)
            newX[:, i] = newX[:, i] * j
            kfold = kfold_cv(model, X = newX, Y = Ydata, num_folds = 5, num_runs = 200)
            alloy = alloy_cv(model, newX, Ydata, Alloys)
            #ax.errorbar(j,kfold['rms'],yerr = kfold['std'], c = 'red', label = '5-fold CV', fmt='o')
            #ax.errorbar(j, alloy['rms'], yerr = alloy['std'], c = 'blue', label = 'Alloy CV', fmt='o')
            ax.errorbar(j, kfold['rms'] + alloy['rms'], yerr = kfold['std'] + alloy['std'], c = 'm', fmt='o')
            print(i, j, kfold['rms'], alloy['rms'], kfold['rms'] + alloy['rms'])
        ax.set_xlabel("Scale Factor")
        ax.set_ylabel("RMSE")
        ax.set_title(X[i])
        fig.savefig(savepath.format(plt.gca().get_title()), dpi=200, bbox_inches='tight')
        fig.clf()
        plt.close()

from sklearn.kernel_ridge import KernelRidge
model = KernelRidge(alpha = .00518, gamma = .518, kernel = 'laplacian')
X=["N(Cu)", "N(Ni)", "N(Mn)", "N(P)","N(Si)", "N( C )", "N(log(fluence)", "N(log(flux)", "N(Temp)"]
Y="delta sigma"
datapath="../../DBTT_Data.csv"
savepath='../../bardeengraphs/{}.png'
data = data_parser.parse(datapath)
data.set_x_features(X)
data.set_y_feature(Y)

execute(model, data, savepath)
コード例 #24
0
__author__ = 'haotian'

import data_parser as dp

data = dp.parse('../DBTT/DBTT_Data14.5.csv')
data.normalization(
    ['Cu (At%)', 'Ni (At%)', 'Mn (At%)', 'P (At%)', 'Si (At%)', 'C (At%)'],
    normalization_type='t')
data.normalization(
    ['log(fluence)', 'log(eff fluence)', 'log(flux)', 'Temp (C)', 'log(time)'])
data.std_normalization(['delta sigma', 'EONY predicted', 'CD predicted (Mpa)'])
data.output('../DBTT/DBTT_Data15.csv')
コード例 #25
0
ファイル: train.py プロジェクト: Pangyk/arousal_valence
    _a_acc = metrics.accuracy_score(y_test_a, pred_a)
    _a_f1 = metrics.f1_score(y_test_a, pred_a, pos_label=1)
    model_a.save_weights(p.path_a)

    model_v.compile(optimizer=keras.optimizers.Adam(lr=p.lr2), loss=tf.losses.CategoricalCrossentropy(from_logits=True),
                    metrics=['accuracy'])
    model_v.fit(x_train, y_train_v, epochs=epochs, batch_size=p.batch_size)
    pred_v = tf.argmax(model_v.predict(x_test, batch_size=p.total - p.train), 1)
    _v_acc = metrics.accuracy_score(y_test_v, pred_v)
    _v_f1 = metrics.f1_score(y_test_v, pred_v, pos_label=1)
    model_v.save_weights(p.path_v)

    return _a_acc, _a_f1, _v_acc, _v_f1


if __name__ == "__main__":
    features = dp.parse("resources/deep_features.txt", p.total)
    arousal_class = dp.parse("resources/arousal_class.txt", p.total) - 1
    valence_class = dp.parse("resources/valence_class.txt", p.total) - 1
    avg_a_acc, avg_a_f1, avg_v_acc, avg_v_f1 = 0, 0, 0, 0
    for i in range(p.repeat):
        a_acc, a_f1, v_acc, v_f1 = train(features, arousal_class, valence_class, p.epochs)
        avg_a_acc += a_acc
        avg_a_f1 += a_f1
        avg_v_acc += v_acc
        avg_v_f1 += v_f1
    print("Arousal result: average accuracy is " + str(avg_a_acc / p.repeat) + ", average F1 score is " + str(
        avg_a_f1 / p.repeat))
    print("Valence result: average accuracy is " + str(avg_v_acc / p.repeat) + ", average F1 score is " + str(
        avg_v_f1 / p.repeat))
コード例 #26
0
def fullfit(model=KernelRidge(alpha=.00139,
                              coef0=1,
                              degree=3,
                              gamma=.518,
                              kernel='rbf',
                              kernel_params=None),
            datapath="../../DBTT_Data.csv",
            savepath='../../{}.png',
            X=[
                "N(Cu)", "N(Ni)", "N(Mn)", "N(P)", "N(Si)", "N( C )",
                "N(log(fluence)", "N(log(flux)", "N(Temp)"
            ],
            Y="delta sigma"):

    data = data_parser.parse(datapath)
    data.set_x_features(X)
    data.set_y_feature(Y)

    Ydata = np.asarray(data.get_y_data()).ravel()
    #Ydata_norm = (Ydata - np.mean(Ydata)) / np.std(Ydata)

    IVARindices = np.linspace(0, 1463, 1464).astype(int)
    IVARplusindices = np.linspace(1464, 1505, 43).astype(int)

    model = model

    # Train the model using the training sets
    model.fit(data.get_x_data(), Ydata)

    Ypredict = model.predict(data.get_x_data())
    #Ypredict = Ypredict_norm * np.std(Ydata) + np.mean(Ydata)

    # calculate rms
    rms = np.sqrt(mean_squared_error(Ypredict, Ydata))
    IVAR_rms = np.sqrt(
        mean_squared_error(Ypredict[IVARindices], Ydata[IVARindices]))
    IVARplus_rms = np.sqrt(
        mean_squared_error(Ypredict[IVARplusindices], Ydata[IVARplusindices]))
    print('RMS: %.5f, IVAR RMS: %.5f, IVAR+ RMS: %.5f' %
          (rms, IVAR_rms, IVARplus_rms))

    # graph outputs
    plt.figure(1)
    plt.scatter(Ydata[IVARindices],
                Ypredict[IVARindices],
                s=10,
                color='black',
                label='IVAR')
    plt.legend(loc=4)
    plt.scatter(Ydata[IVARplusindices],
                Ypredict[IVARplusindices],
                s=10,
                color='red',
                label='IVAR+')
    plt.legend(loc=4)
    plt.plot(plt.gca().get_ylim(), plt.gca().get_ylim(), ls="--", c=".3")
    plt.xlabel('Measured (MPa)')
    plt.ylabel('Predicted (MPa)')
    plt.title('Full Fit')
    plt.figtext(.15, .83, 'Overall RMS: %.4f' % (rms), fontsize=14)
    plt.figtext(.15, .77, 'IVAR RMS: %.4f' % (IVAR_rms), fontsize=14)
    plt.figtext(.15, .71, 'IVAR+ RMS: %.4f' % (IVARplus_rms), fontsize=14)
    plt.savefig(savepath.format(plt.gca().get_title()),
                dpi=200,
                bbox_inches='tight')
    '''
    plt.figure(2)
    plt.scatter(Ydata, Ypredict-Ydata, s=10, color='black')
    plt.xlabel('Measured (MPa)')
    plt.ylabel('Predicted - Measured (MPa)')
    plt.title('Error vs Actual')
    plt.savefig(savepath.format("error_vs_actual"), dpi=200, bbox_inches='tight')'''

    plt.show()
    plt.close()
コード例 #27
0
def desimp(model=KernelRidge(alpha=.00139, gamma=.518, kernel='rbf'),
           datapath="../../DBTT_Data.csv", savepath='../../{}.png',
           X = ["N(Cu)", "N(Ni)", "N(Mn)", "N(P)", "N(Si)","N( C )", "N(log(fluence)", "N(log(flux)", "N(Temp)"], Y = "delta sigma"):
    data = data_parser.parse(datapath)
    data.set_x_features(X)
    data.set_y_feature(Y)

    overall_rms_list = []
    sd_list = []
    descriptorlist = ['Cu', 'Ni', 'Mn', 'P', 'Si', 'C', 'Fl', 'Fx', 'Temp']

    numFolds = 5
    numIter = 200
    model = model

    Xdata = np.asarray(data.get_x_data())
    Ydata = np.asarray(data.get_y_data()).ravel()

    print("Testing descriptor importance using {}x {} - Fold CV".format(numIter, numFolds))
    print("")

    for x in range(len(data.get_x_data()[0])):
        RMS_List = []
        newX = np.delete(Xdata, x, 1)
        for n in range(numIter):
            kf = cross_validation.KFold(len(Xdata), n_folds=numFolds, shuffle=True)
            K_fold_rms_list = [];
            # split into testing and training sets
            for train_index, test_index in kf:
                X_train, X_test = newX[train_index], newX[test_index]
                Y_train, Y_test = Ydata[train_index], Ydata[test_index]
                # train on training sets
                model.fit(X_train, Y_train)
                YTP = model.predict(X_test)
                rms = np.sqrt(mean_squared_error(Y_test, YTP))
                K_fold_rms_list.append(rms)
            RMS_List.append(np.mean(K_fold_rms_list))
            # calculate rms

        maxRMS = np.amax(RMS_List)
        minRMS = np.amin(RMS_List)
        avgRMS = np.mean(RMS_List)
        medRMS = np.median(RMS_List)
        sd = np.sqrt(np.mean((RMS_List - np.mean(RMS_List)) ** 2))

        print("Removing {}:".format(descriptorlist[x]))
        print("The average RMSE was " + str(avgRMS))
        print("The median RMSE was " + str(medRMS))
        print("The max RMSE was " + str(maxRMS))
        print("The min RMSE was " + str(minRMS))
        print("The std deviation of the RMSE values was " + str(sd))
        print("")

        overall_rms_list.append(avgRMS)
        sd_list.append(sd)

    matplotlib.rcParams.update({'font.size': 15})
    fig, ax = plt.subplots()
    rects = ax.bar(np.arange(9), overall_rms_list, color='r', yerr=sd_list)
    ax.set_xlabel('Descriptor Removed')
    ax.set_ylabel('200x 5-fold RMSE')
    ax.set_title('Descriptor Importance')
    ax.set_xticks(np.arange(9) + .4)
    ax.set_xticklabels(descriptorlist)

    for rect in rects:
        height = rect.get_height()
        ax.text(rect.get_x() + rect.get_width() / 2., 1.05 * height,
                '%.2f' % (height),
                ha='center', va='bottom')

    fig.savefig(savepath.format(plt.gca().get_title()), dpi=200, bbox_inches='tight')
    plt.show()
    plt.close()
コード例 #28
0
import matplotlib.pyplot as plt
from sklearn import cross_validation
from sklearn.metrics import mean_squared_error
from evolutionary_search import EvolutionaryAlgorithmSearchCV
from sklearn.kernel_ridge import KernelRidge
import random


X=["descriptor1", "descriptor2"' ...']
Y1="response variable"

datapath="training data path"
testdatapath='testing data path
savepath='{}.png'

data = data_parser.parse(datapath)
data.set_x_features(X)
data.set_y_feature(Y1)
data.remove_all_filters()
# add filters for training data

lwrdata = data_parser.parse(lwrdatapath)
lwrdata.set_x_features(X)
lwrdata.set_y_feature(Y2)
lwrdata.remove_all_filters()
# add filters for testing data

Ydata = data.get_y_data()
Xdata = data.get_x_data()
Ydata_test = testdata.get_y_data()
Xdata_test = testdata.get_x_data()
コード例 #29
0
def flfxex(model=KernelRidge(alpha=.00139,
                             coef0=1,
                             degree=3,
                             gamma=.518,
                             kernel='rbf',
                             kernel_params=None),
           datapath="../../DBTT_Data.csv",
           savepath='../../{}.png',
           X=[
               "N(Cu)", "N(Ni)", "N(Mn)", "N(P)", "N(Si)", "N( C )",
               "N(log(fluence)", "N(log(flux)", "N(Temp)"
           ],
           Y="delta sigma"):

    data = data_parser.parse(datapath)
    data.set_x_features(X)
    data.set_y_feature(Y)

    fluence_divisions = [3.3E18, 3.3E19, 3.3E20]
    flux_divisions = [5e11, 2e11, 1e11]

    fig, ax = plt.subplots(1, 3, figsize=(30, 10))
    for x in range(len(fluence_divisions)):
        model = model
        data.remove_all_filters()
        data.add_inclusive_filter("fluence n/cm2", '<', fluence_divisions[x])
        l_train = len(data.get_y_data())
        model.fit(data.get_x_data(), np.asarray(data.get_y_data()).ravel())

        data.remove_all_filters()
        data.add_inclusive_filter("fluence n/cm2", '>=', fluence_divisions[x])
        l_test = len(data.get_y_data())
        Ypredict = model.predict(data.get_x_data())
        RMSE = np.sqrt(
            mean_squared_error(Ypredict,
                               np.asarray(data.get_y_data()).ravel()))

        matplotlib.rcParams.update({'font.size': 26})
        ax[x].scatter(data.get_y_data(), Ypredict, color='black', s=10)
        ax[x].plot(ax[x].get_ylim(), ax[x].get_ylim(), ls="--", c=".3")
        ax[x].set_xlabel('Measured ∆sigma (Mpa)')
        ax[x].set_ylabel('Predicted ∆sigma (Mpa)')
        ax[x].set_title('Testing Fluence > {}'.format(fluence_divisions[x]))
        ax[x].text(.1,
                   .88,
                   'RMSE: {:.3f}'.format(RMSE),
                   fontsize=30,
                   transform=ax[x].transAxes)
        ax[x].text(.1,
                   .83,
                   'Train: {}, Test: {}'.format(l_train, l_test),
                   transform=ax[x].transAxes)

    fig.tight_layout()
    plt.subplots_adjust(bottom=.2)
    fig.savefig(savepath.format("fluence_extrapolation"),
                dpi=150,
                bbox_inches='tight')
    plt.show()
    plt.close()

    fig, ax = plt.subplots(1, 3, figsize=(30, 10))
    for x in range(len(flux_divisions)):
        model = model
        data.remove_all_filters()
        data.add_inclusive_filter("flux n/cm2/s", '>', flux_divisions[x])
        l_train = len(data.get_y_data())
        model.fit(data.get_x_data(), np.asarray(data.get_y_data()).ravel())

        data.remove_all_filters()
        data.add_inclusive_filter("flux n/cm2/s", '<=', flux_divisions[x])
        l_test = len(data.get_y_data())
        Ypredict = model.predict(data.get_x_data())
        RMSE = np.sqrt(
            mean_squared_error(Ypredict,
                               np.asarray(data.get_y_data()).ravel()))

        matplotlib.rcParams.update({'font.size': 26})
        ax[x].scatter(data.get_y_data(), Ypredict, color='black', s=10)
        ax[x].plot(ax[x].get_ylim(), ax[x].get_ylim(), ls="--", c=".3")
        ax[x].set_xlabel('Measured ∆sigma (Mpa)')
        ax[x].set_ylabel('Predicted ∆sigma (Mpa)')
        ax[x].set_title('Testing Flux < {:.0e}'.format(flux_divisions[x]))
        ax[x].text(.1,
                   .88,
                   'RMSE: {:.3f}'.format(RMSE),
                   fontsize=30,
                   transform=ax[x].transAxes)
        ax[x].text(.1,
                   .83,
                   'Train: {}, Test: {}'.format(l_train, l_test),
                   transform=ax[x].transAxes)

    fig.tight_layout()
    plt.subplots_adjust(bottom=.2)
    fig.savefig(savepath.format("flux_extrapolation"),
                dpi=150,
                bbox_inches='tight')
    plt.show()
    plt.close()
コード例 #30
0
def execute(model,
            data,
            savepath,
            csvlist="",
            xfieldlist="",
            yfieldlist="",
            xerrfieldlist="",
            yerrfieldlist="",
            xlabel="",
            ylabel="",
            labellist="",
            plotlabel="overlay",
            guideline=0,
            sizes=None,
            faces=None,
            markers=None,
            linestyles=None,
            outlines=None,
            timex="",
            stepsize="1.0",
            startx=None,
            endx=None,
            whichyaxis="",
            *args,
            **kwargs):
    """Overlay plots
        Args:
            csvlist <str>: comma-delimited list of csv names
                            Currently only supports two csvs. 
            xfieldlist <str>: comma-delimited list of x-field names, to
                                match with csvlist
            xerrfieldlist <str>: comma-delimited list of x error field names, to
                                match with csvlist
            yfieldlist <str>: comma-delimited list of y-field names, to
                                match with csvlist
            yerrfieldlist <str>: comma-delimited list of y error field names, to
                                match with csvlist
    """
    stepsize = float(stepsize)
    csvs = csvlist.split(",")
    print(csvs)
    xfields = xfieldlist.split(",")
    yfields = yfieldlist.split(",")

    if not (len(csvs) == len(xfields)):
        print("Length of x field list not match length of csv list.")
        print("Exiting.")
        return
    if not (len(csvs) == len(yfields)):
        print("Length of y field list does not match length of csv list.")
        print("Exiting.")
        return
    if len(xerrfieldlist) > 0:
        xerrfields = xerrfieldlist.split(",")
        if not (len(xerrfields) == len(xfields)):
            print(
                "Length of x error field list does not match length of x field list."
            )
            print("Exiting.")
            return
    else:
        xerrfields = list()
    if len(yerrfieldlist) > 0:
        yerrfields = yerrfieldlist.split(",")
        if not (len(yerrfields) == len(yfields)):
            print(
                "Length of y error field list does not match length of y field list."
            )
            print("Exiting.")
            return
    else:
        yerrfields = list()

    xdatas = list()
    ydatas = list()
    xerrs = list()
    yerrs = list()
    for pidx in range(0, len(csvs)):
        print("Getting data from %s" % csvs[pidx])
        data = data_parser.parse(csvs[pidx].strip())
        xdata = np.asarray(data.get_data(xfields[pidx].strip())).ravel()
        ydata = np.asarray(data.get_data(yfields[pidx].strip())).ravel()
        xerrdata = None
        yerrdata = None
        if len(xerrfields) > 0:
            xerrfield = xerrfields[pidx].strip()
            if not (xerrfield == ""):
                xerrdata = np.asarray(data.get_data(xerrfield)).ravel()
        if len(yerrfields) > 0:
            yerrfield = yerrfields[pidx].strip()
            if not (yerrfield == ""):
                yerrdata = np.asarray(data.get_data(yerrfield)).ravel()
        xdatas.append(xdata)
        ydatas.append(ydata)
        xerrs.append(xerrdata)
        yerrs.append(yerrdata)
    if xlabel == "":
        xlabel = "%s" % xfields
    if ylabel == "":
        ylabel = "%s" % yfields
    if labellist == "":
        labellist = list()
        for csvname in csvs:
            labellist.append(os.path.basename(csvs[0]).split(".")[0])
    else:
        labellist = labellist.split(",")
    kwargs = dict()
    kwargs['xdatalist'] = xdatas
    kwargs['ydatalist'] = ydatas
    kwargs['labellist'] = labellist
    kwargs['xlabel'] = xlabel
    kwargs['ylabel'] = ylabel
    kwargs['xerrlist'] = xerrs
    kwargs['yerrlist'] = yerrs
    kwargs['stepsize'] = stepsize
    kwargs['savepath'] = savepath
    kwargs['plotlabel'] = plotlabel
    kwargs['guideline'] = guideline
    if not (faces is None):
        kwargs['faces'] = faces
    if not (outlines is None):
        kwargs['outlines'] = outlines
    if not (sizes is None):
        kwargs['sizes'] = sizes
    if not (markers is None):
        kwargs['markers'] = markers
    if not (linestyles is None):
        kwargs['linestyles'] = linestyles
    kwargs['timex'] = timex
    kwargs['startx'] = startx
    kwargs['endx'] = endx
    notelist = list()
    kwargs['notelist'] = notelist
    kwargs['whichyaxis'] = whichyaxis
    #for key,value in kwargs.items():
    #    print(key,":",value)
    print("Plotting.")
    plotxy.multiple_overlay(**kwargs)
    return
コード例 #31
0
ファイル: main2.py プロジェクト: WesolyBanan/ZIwL
def do_everything(file_name, firsto_solutiono_strategeiro, go_back_to_depo=True):
    # Create the data.
    # data = create_data_array()
    data = data_parser.parse(file_name)
    locations = data[0]
    travel_times = data[1]

    demands_pal = data[2]
    demands_kg = data[3]

    start_times = data[4]
    end_times = data[5]

    vehicles_pal = data[6]
    vehicles_kg = data[7]
    vehicles_cost = data[8]
    vehicles_maxkm = data[9]

    multi_start = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                   0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                   0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
    multi_end = [0, 1, 18, 50, 0, 0, 1, 18, 50, 0,
                   0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                   0, 0, 0, 0, 0, 0, 1, 18, 50, 0]

    num_locations = len(locations)
    num_vehicles = len(vehicles_pal)
    depot = 0
    search_time_limit_ms = 300000
    local_search_max_time_ms = 3000

    # Result variables
    result = []
    tmp_route = []
    h_route_dist = 0
    h_route_time = 0
    tmp_orders = []

    # Create routing model.
    if num_locations > 0:

        # The number of nodes of the VRP is num_locations.
        # Nodes are indexed from 0 to num_locations - 1. By default the start of
        # a route is node 0.
        if go_back_to_depo:
            routing = pywrapcp.RoutingModel(num_locations, num_vehicles, depot)
        else:
            routing = pywrapcp.RoutingModel(num_locations, num_vehicles, multi_start, multi_end)
        search_parameters = pywrapcp.RoutingModel.DefaultSearchParameters()
        search_parameters.first_solution_strategy = firsto_solutiono_strategeiro
        search_parameters.time_limit_ms = search_time_limit_ms
        search_parameters.lns_time_limit_ms = local_search_max_time_ms
        # print(search_parameters)


        ############################# Callbacks to the distance function and travel time functions here.
        cost_callback_matrix = []
        for v_id in range(len(vehicles_cost)):
            cost_between_locations = CreateCostCallback(locations, vehicles_cost[v_id])
            cost_callback = cost_between_locations.Cost
            cost_callback_matrix.append(cost_callback)
            routing.SetArcCostEvaluatorOfVehicle(cost_callback, v_id)


        ############################# Adding pallets dimension constraints.
        demands_pal_at_orders = CreateDemandPalCallback(demands_pal)
        demands_pal_callback = demands_pal_at_orders.DemandPal

        NullCapacitySlack = 0;
        fix_start_cumul_to_zero = True
        pallets = "Pallets"

        routing.AddDimensionWithVehicleCapacity(demands_pal_callback, NullCapacitySlack, vehicles_pal, fix_start_cumul_to_zero, pallets)

        ############################# Adding weight dimension constraints.
        demands_kg_at_orders = CreateDemandKgCallback(demands_kg)
        demands_kg_callback = demands_kg_at_orders.DemandKg

        NullCapacitySlack = 0;
        fix_start_cumul_to_zero = True
        kilograms = "Kilograms"

        routing.AddDimensionWithVehicleCapacity(demands_kg_callback, NullCapacitySlack, vehicles_kg, fix_start_cumul_to_zero, kilograms)

        ############################# Adding kmlimit dimension constraints.
        demands_kms_at_orders = CreateCostCallback(locations, 1)
        demands_kms_callback = demands_kms_at_orders.Cost

        NullCapacitySlack = 0;
        fix_start_cumul_to_zero = True
        kilometers = "Kilometers"

        routing.AddDimensionWithVehicleCapacity(demands_kms_callback, NullCapacitySlack, vehicles_maxkm, fix_start_cumul_to_zero, kilometers)

        ############################## Add time dimension.
        day = max(end_times)
        time = "Time"

        travelllo_times = CreateTravelTimeCallback(travel_times)
        travel_time_callback = travelllo_times.TravelTime

        routing.AddDimension(travel_time_callback, day, day, fix_start_cumul_to_zero, time)


        ############################# Add time window constraints.
        time_dimension = routing.GetDimensionOrDie(time)
        for location in range(1, num_locations):
            start = start_times[location]
            end = end_times[location]
            time_dimension.CumulVar(location).SetRange(start, end)

        ############################ Solve displays a solution if any.
        assignment = routing.SolveWithParameters(search_parameters)
        if assignment:
            size = len(locations)
            # Solution cost.
            # print("Total cost of all routes: " + str(assignment.ObjectiveValue()/1000) + "\n")
            result.append(assignment.ObjectiveValue()/1000)
            result.append(0)
            # Inspect solution.
            pallets_dimension = routing.GetDimensionOrDie(pallets)
            kilograms_dimension = routing.GetDimensionOrDie(kilograms)
            kilometers_dimension = routing.GetDimensionOrDie(kilometers)
            time_dimension = routing.GetDimensionOrDie(time)

            for vehicle_nbr in range(num_vehicles):
                index = routing.Start(vehicle_nbr)
                # plan_output = 'Vehicle {0}:'.format(vehicle_nbr)

                while not routing.IsEnd(index):
                    node_index = routing.IndexToNode(index)
                    time_var = time_dimension.CumulVar(index)
                    tmp_orders.append([node_index-1, assignment.Min(time_var), assignment.Max(time_var)])
                    kilometers_var = kilometers_dimension.CumulVar(index)
                    time_var = time_dimension.CumulVar(index)
                    h_route_dist = assignment.Value(kilometers_var) / 1000
                    h_route_time = assignment.Min(time_var)
                    # plan_output += " Order {node_index} Time({tmin}, {tmax}) -> ".format(
                    #                 node_index=node_index,
                    #                 tmin=str(assignment.Min(time_var)),
                    #                 tmax=str(assignment.Max(time_var)))
                    index = assignment.Value(routing.NextVar(index))

                node_index = routing.IndexToNode(index)
                pallets_var = pallets_dimension.CumulVar(index)
                kilograms_var = kilograms_dimension.CumulVar(index)
                kilometers_var = kilometers_dimension.CumulVar(index)
                time_var = time_dimension.CumulVar(index)

                # plan_output += " {node_index} Load({load}) Time({tmin}, {tmax})".format(
                #                 node_index=node_index,
                #                 load=assignment.Value(load_var),
                #                 tmin=str(assignment.Min(time_var)),
                #                 tmax=str(assignment.Max(time_var)))
                # print(plan_output)
                # print("\n")

                tmp_orders.append([node_index-1, assignment.Min(time_var), assignment.Max(time_var)])

                tmp_route.append(vehicle_nbr)

                tmp_route.append(assignment.Value(kilometers_var) / 1000 * vehicles_cost[vehicle_nbr])
                tmp_route.append(assignment.Value(kilometers_var) / 1000)
                tmp_route.append(assignment.Min(time_var))

                result[1] += h_route_dist * vehicles_cost[vehicle_nbr]

                tmp_route.append(h_route_dist * vehicles_cost[vehicle_nbr])
                tmp_route.append(h_route_dist)
                tmp_route.append(h_route_time)

                tmp_route.append(assignment.Value(pallets_var)/100)
                tmp_route.append(vehicles_pal[vehicle_nbr]/100)
                tmp_route.append(assignment.Value(pallets_var) / vehicles_pal[vehicle_nbr])

                tmp_route.append(assignment.Value(kilograms_var))
                tmp_route.append(vehicles_kg[vehicle_nbr])
                tmp_route.append(assignment.Value(kilograms_var) / vehicles_kg[vehicle_nbr])

                tmp_route.append(tmp_orders)

                if len(tmp_orders) > 2:
                    result.append(tmp_route)

                tmp_orders = []
                tmp_route = []

            return result

        else:
            print(str(firsto_solutiono_strategeiro)+' No solution found.')
            return [float('inf')]
    else:
        print('Specify an instance greater than 0.')
コード例 #32
0
ファイル: KFold_CV.py プロジェクト: JeritGeorge/DBTT
def cv(model, datapath, savepath, num_folds=5, num_runs=200,
       X=["N(Cu)", "N(Ni)", "N(Mn)", "N(P)", "N(log(fluence)", "N(log(flux)", "N(Temp)"],
       Y="delta sigma"):

    # get data
    data = data_parser.parse(datapath)
    data.set_x_features(X)
    data.set_y_feature(Y)
    Ydata = np.asarray(data.get_y_data()).ravel()
    Xdata = np.asarray(data.get_x_data())

    Y_predicted_best = []
    Y_predicted_worst = []

    maxRMS = 1
    minRMS = 100

    RMS_List = []
    for n in range(num_runs):
        kf = cross_validation.KFold(len(Xdata), n_folds=num_folds, shuffle=True)
        K_fold_rms_list = []
        Overall_Y_Pred = np.zeros(len(Xdata))
        # split into testing and training sets
        for train_index, test_index in kf:
            X_train, X_test = Xdata[train_index], Xdata[test_index]
            Y_train, Y_test = Ydata[train_index], Ydata[test_index]
            # train on training sets
            model = model
            model.fit(X_train, Y_train)
            Y_test_Pred = model.predict(X_test)
            rms = np.sqrt(mean_squared_error(Y_test, Y_test_Pred))
            K_fold_rms_list.append(rms)
            Overall_Y_Pred[test_index] = Y_test_Pred

        RMS_List.append(np.mean(K_fold_rms_list))

        if np.mean(K_fold_rms_list) > maxRMS:
            maxRMS = np.mean(K_fold_rms_list)
            Y_predicted_worst = Overall_Y_Pred

        if np.mean(K_fold_rms_list) < minRMS:
            minRMS = np.mean(K_fold_rms_list)
            Y_predicted_best = Overall_Y_Pred

    avgRMS = np.mean(RMS_List)
    medRMS = np.median(RMS_List)
    sd = np.std(RMS_List)

    print("Using {}x {}-Fold CV: ".format(num_runs, num_folds))
    print("The average RMSE was {:.3f}".format(avgRMS))
    print("The median RMSE was {:.3f}".format(medRMS))
    print("The max RMSE was {:.3f}".format(maxRMS))
    print("The min RMSE was {:.3f}".format(minRMS))
    print("The std deviation of the RMSE values was {:.3f}".format(sd))

    f, ax = plt.subplots(1, 2, figsize = (11,5))
    ax[0].scatter(Ydata, Y_predicted_best, c='black', s=10)
    ax[0].plot(ax[0].get_ylim(), ax[0].get_ylim(), ls="--", c=".3")
    ax[0].set_title('Best Fit')
    ax[0].text(.1, .88, 'Min RMSE: {:.3f}'.format(minRMS), transform=ax[0].transAxes)
    ax[0].text(.1, .83, 'Mean RMSE: {:.3f}'.format(avgRMS), transform=ax[0].transAxes)
    ax[0].set_xlabel('Measured (Mpa)')
    ax[0].set_ylabel('Predicted (Mpa)')

    ax[1].scatter(Ydata, Y_predicted_worst, c='black', s=10)
    ax[1].plot(ax[1].get_ylim(), ax[1].get_ylim(), ls="--", c=".3")
    ax[1].set_title('Worst Fit')
    ax[1].text(.1, .88, 'Max RMSE: {:.3f}'.format(maxRMS), transform=ax[1].transAxes)
    ax[1].set_xlabel('Measured (Mpa)')
    ax[1].set_ylabel('Predicted (Mpa)')

    f.tight_layout()
    f.savefig(savepath.format("cv_best_worst"), dpi=200, bbox_inches='tight')
    plt.show()
    plt.close()
コード例 #33
0
def main():
    # Class Negative Data
    d1 = data_parser.parse('Datasets/Healthy Controls/MS_A_1.mzml')
    d2 = data_parser.parse('Datasets/Healthy Controls/MS_A_2.mzml')
    d3 = data_parser.parse('Datasets/Healthy Controls/MS_A_3.mzml')
    d4 = data_parser.parse('Datasets/Healthy Controls/MS_A_4.mzml')
    d5 = data_parser.parse('Datasets/Healthy Controls/MS_A_5.mzml')
    d6 = data_parser.parse('Datasets/Healthy Controls/MS_A_6.mzml')
    d7 = data_parser.parse('Datasets/Healthy Controls/MS_A_7.mzml')

    # Class Positive Data
    d8 = data_parser.parse('Datasets/PC Diagnosed/MS_B_1.mzml')
    d9 = data_parser.parse('Datasets/PC Diagnosed/MS_B_2.mzml')
    d10 = data_parser.parse('Datasets/PC Diagnosed/MS_B_3.mzml')
    d11 = data_parser.parse('Datasets/PC Diagnosed/MS_B_4.mzml')
    d12 = data_parser.parse('Datasets/PC Diagnosed/MS_B_5.mzml')
    d13 = data_parser.parse('Datasets/PC Diagnosed/MS_B_6.mzml')
    d14 = data_parser.parse('Datasets/PC Diagnosed/MS_B_7.mzml')

    full_data = d1 + d2 + d3 + d4 + d5 + d6 + d7 + d8 + d9 + d10 + d11 + d12 + d13 + d14
    param = []
    data = preprocessing.get_preprocessed_data(full_data, param)
    # train_test_model(data, param)
    cross_validate(data, param)
コード例 #34
0
        weights_hidden = np.array(
            weights.eval(session=sess))  # Weight values to the hidden layer
        weights_output = np.array(weights2.eval(
            session=sess)).transpose()  # Weight values to the output layer
        effect = np.average(
            weights_hidden * weights_output,
            axis=1)  # (hidden layer weight) * (output layer weight)
        print(effect)
        abs_effect = np.average(np.abs(weights_hidden * weights_output),
                                axis=1)  # absolute value of the overall weight
        return do_eval(sess, eval_loss, X_placeholder, Y_placeholder,
                       data_test, predicted_value, effect,
                       abs_effect)  # Predicts

featdat, dat, data = data_parser.parse("DBTT_Data19.csv")
X = [
    "N(Cu)", "N(Ni)", "N(Mn)", "N(P)", "N(Si)", "N( C )", "N(log(eff fluence))"
]
X_LWR = [
    "N(Cu)", "N(Ni)", "N(Mn)", "N(P)", "N(Si)", "N( C )", "N(log(eff fluence))"
]

Y = "CD delta sigma"
data.set_x_features(X)
data.set_y_feature(Y)

model = 300
lwr_datapath = "CD_LWR_clean7.csv"
##data.add_exclusive_filter("Alloy", '=', 29)
##data.add_exclusive_filter("Alloy", '=', 14)
コード例 #35
0
def cv(model,
       datapath,
       savepath,
       num_folds=5,
       num_runs=200,
       X=[
           "N(Cu)", "N(Ni)", "N(Mn)", "N(P)", "N(log(fluence)", "N(log(flux)",
           "N(Temp)"
       ],
       Y="delta sigma"):

    # get data
    data = data_parser.parse(datapath)
    data.set_x_features(X)
    data.set_y_feature(Y)
    Ydata = np.asarray(data.get_y_data()).ravel()
    Xdata = np.asarray(data.get_x_data())

    Y_predicted_best = []
    Y_predicted_worst = []

    maxRMS = 1
    minRMS = 100

    RMS_List = []
    for n in range(num_runs):
        kf = cross_validation.KFold(len(Xdata),
                                    n_folds=num_folds,
                                    shuffle=True)
        K_fold_rms_list = []
        Overall_Y_Pred = np.zeros(len(Xdata))
        # split into testing and training sets
        for train_index, test_index in kf:
            X_train, X_test = Xdata[train_index], Xdata[test_index]
            Y_train, Y_test = Ydata[train_index], Ydata[test_index]
            # train on training sets
            model = model
            model.fit(X_train, Y_train)
            Y_test_Pred = model.predict(X_test)
            rms = np.sqrt(mean_squared_error(Y_test, Y_test_Pred))
            K_fold_rms_list.append(rms)
            Overall_Y_Pred[test_index] = Y_test_Pred

        RMS_List.append(np.mean(K_fold_rms_list))

        if np.mean(K_fold_rms_list) > maxRMS:
            maxRMS = np.mean(K_fold_rms_list)
            Y_predicted_worst = Overall_Y_Pred

        if np.mean(K_fold_rms_list) < minRMS:
            minRMS = np.mean(K_fold_rms_list)
            Y_predicted_best = Overall_Y_Pred

    avgRMS = np.mean(RMS_List)
    medRMS = np.median(RMS_List)
    sd = np.std(RMS_List)

    print("Using {}x {}-Fold CV: ".format(num_runs, num_folds))
    print("The average RMSE was {:.3f}".format(avgRMS))
    print("The median RMSE was {:.3f}".format(medRMS))
    print("The max RMSE was {:.3f}".format(maxRMS))
    print("The min RMSE was {:.3f}".format(minRMS))
    print("The std deviation of the RMSE values was {:.3f}".format(sd))

    f, ax = plt.subplots(1, 2, figsize=(11, 5))
    ax[0].scatter(Ydata, Y_predicted_best, c='black', s=10)
    ax[0].plot(ax[0].get_ylim(), ax[0].get_ylim(), ls="--", c=".3")
    ax[0].set_title('Best Fit')
    ax[0].text(.1,
               .88,
               'Min RMSE: {:.3f}'.format(minRMS),
               transform=ax[0].transAxes)
    ax[0].text(.1,
               .83,
               'Mean RMSE: {:.3f}'.format(avgRMS),
               transform=ax[0].transAxes)
    ax[0].set_xlabel('Measured (Mpa)')
    ax[0].set_ylabel('Predicted (Mpa)')

    ax[1].scatter(Ydata, Y_predicted_worst, c='black', s=10)
    ax[1].plot(ax[1].get_ylim(), ax[1].get_ylim(), ls="--", c=".3")
    ax[1].set_title('Worst Fit')
    ax[1].text(.1,
               .88,
               'Max RMSE: {:.3f}'.format(maxRMS),
               transform=ax[1].transAxes)
    ax[1].set_xlabel('Measured (Mpa)')
    ax[1].set_ylabel('Predicted (Mpa)')

    f.tight_layout()
    f.savefig(savepath.format("cv_best_worst"), dpi=200, bbox_inches='tight')
    plt.show()
    plt.close()
コード例 #36
0
                parameter_values.append(config.getboolean('AllTests', parameter))
        else:
            if config.has_option(case_name, parameter):
                parameter_values.append(config.get(case_name, parameter))
            else:
                parameter_values.append(config.get('AllTests', parameter))

    model, data_path, save_path, y_data, x_data, lwr_data_path, weights = parameter_values

    if "CD" in y_data or "EONY" in y_data:
        save_path = save_path.format(y_data.split(' ',1)[0] + '_{}')

    model = importlib.import_module(model).get()
    x_data = x_data.split(',')

    data = data_parser.parse(data_path, weights)
    data.set_x_features(x_data)
    data.set_y_feature(y_data)

    data.add_exclusive_filter("Temp (C)", '<>', 290)
    data.overwrite_data_w_filtered_data()

    lwr_data = data_parser.parse(lwr_data_path)
    if not y_data == "delta sigma":
        lwr_data.set_x_features(x_data)
        lwr_data.set_y_feature(y_data)

    if y_data == "CD delta sigma":
        data.add_exclusive_filter("Alloy",'=', 29)
        data.add_exclusive_filter("Alloy",'=', 8)
        data.add_exclusive_filter("Alloy", '=', 1)
コード例 #37
0
from data_parser import parse

if __name__ == "__main__":
    #x=parse("/media/asdazey/PSCSTA/Judge/",True)
    x = parse("/home/asdazey/Desktop/PSCSTA/aiden/repeat/repeat.in", True)
    print(x)
コード例 #38
0
from data_parser import parse


def find(n):
    n = int(n)
    last = 0
    current = 1
    for i in range(1000000):
        last = current
        current += i
        if current > n:
            return [(i - 1 - (n - last)), (n - last)]


if __name__ == "__main__":
    #x=parse("/home/asdazey/Desktop/PSCSTA/logan/it/infinite.in")
    x = parse("/media/asdazey/PSCSTA/Judge/infinite.in")
    #print(x)

    for i in range(int(x[0]) + 1):
        cord = find(x[i + 1])
        #print(x[i+1])
        #print(cord)

        if cord[0] == 0 and cord[1] == 0:
            print(2)
        elif cord[0] == 0 or cord[1] == 0:
            print(3)
        else:
            print(4)
コード例 #39
0
        weights_hidden = np.array(
            weights.eval(session=sess))  # Weight values to the hidden layer
        weights_output = np.array(weights2.eval(
            session=sess)).transpose()  # Weight values to the output layer
        effect = np.average(
            weights_hidden * weights_output,
            axis=1)  # (hidden layer weight) * (output layer weight)
        print(effect)
        abs_effect = np.average(np.abs(weights_hidden * weights_output),
                                axis=1)  # absolute value of the overall weight
        return do_eval(sess, eval_loss, X_placeholder, Y_placeholder,
                       data_test, predicted_value, effect,
                       abs_effect)  # Predicts

featdat, dat, data = data_parser.parse("DBTT_Data19.csv")
X = [
    "N(Cu)", "N(Ni)", "N(Mn)", "N(P)", "N(Si)", "N( C )", "N(log(eff fluence))"
]
X_LWR = [
    "N(Cu)", "N(Ni)", "N(Mn)", "N(P)", "N(Si)", "N( C )", "N(log(eff fluence))"
]

Y = "delta sigma"
data.set_x_features(X)
data.set_y_feature(Y)

lwr_datapath = "CD_LWR_clean7.csv"
##data.add_exclusive_filter("Alloy", '=', 29)
##data.add_exclusive_filter("Alloy", '=', 14)
##data.add_exclusive_filter("Temp (C)", '<>', 290)
コード例 #40
0
__author__ = 'haotian'

import data_parser as dp


data = dp.parse('../DBTT/DBTT_Data14.5.csv')
data.normalization(
    ['Cu (At%)', 'Ni (At%)', 'Mn (At%)', 'P (At%)','Si (At%)','C (At%)'],
    normalization_type='t')
data.normalization(['log(fluence)','log(eff fluence)','log(flux)','Temp (C)','log(time)'])
data.std_normalization(['delta sigma', 'EONY predicted', 'CD predicted (Mpa)'])
data.output('../DBTT/DBTT_Data15.csv')